From c5aca0c27be31e94ffdb01ef2eb29d3b373d7f4c Mon Sep 17 00:00:00 2001
From: Glenn Weidner <gweidner@us.ibm.com>
Date: Sun, 10 May 2015 19:18:32 -0700
Subject: [PATCH 01/59] [SPARK-7427] [PYSPARK] Make sharedParams match in
 Scala, Python

Modified 2 files:
python/pyspark/ml/param/_shared_params_code_gen.py
python/pyspark/ml/param/shared.py

Generated shared.py on Linux using Python 2.6.6 on Redhat Enterprise Linux Server 6.6.
python _shared_params_code_gen.py > shared.py

Only changed maxIter, regParam, rawPredictionCol based on strings from SharedParamsCodeGen.scala.  Note warning was displayed when committing shared.py:
warning: LF will be replaced by CRLF in python/pyspark/ml/param/shared.py.

Author: Glenn Weidner <gweidner@us.ibm.com>

Closes #6023 from gweidner/br-7427 and squashes the following commits:

db72e32 [Glenn Weidner] [SPARK-7427] [PySpark] Make sharedParams match in Scala, Python
825e4a9 [Glenn Weidner] [SPARK-7427] [PySpark] Make sharedParams match in Scala, Python
e6a865e [Glenn Weidner] [SPARK-7427] [PySpark] Make sharedParams match in Scala, Python
1eee702 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
1ac10e5 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
cafd104 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
9bea1eb [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
4a35c20 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
9790cbe [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
d9c30f4 [Glenn Weidner] [SPARK-7275] [SQL] [WIP] Make LogicalRelation public
---
 .../ml/param/_shared_params_code_gen.py       |  6 ++--
 python/pyspark/ml/param/shared.py             | 30 +++++++++----------
 python/pyspark/ml/tests.py                    |  4 +--
 tox.ini                                       |  2 +-
 4 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index ed3171b6976d3..3be0979b92013 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -88,12 +88,12 @@ def get$Name(self):
     print("\n# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.\n")
     print("from pyspark.ml.param import Param, Params\n\n")
     shared = [
-        ("maxIter", "max number of iterations", None),
-        ("regParam", "regularization constant", None),
+        ("maxIter", "max number of iterations (>= 0)", None),
+        ("regParam", "regularization parameter (>= 0)", None),
         ("featuresCol", "features column name", "'features'"),
         ("labelCol", "label column name", "'label'"),
         ("predictionCol", "prediction column name", "'prediction'"),
-        ("rawPredictionCol", "raw prediction column name", "'rawPrediction'"),
+        ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"),
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index d0bcadee22347..4b22322b895b4 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -22,16 +22,16 @@
 
 class HasMaxIter(Params):
     """
-    Mixin for param maxIter: max number of iterations.
+    Mixin for param maxIter: max number of iterations (>= 0).
     """
 
     # a placeholder to make it appear in the generated doc
-    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations")
+    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0)")
 
     def __init__(self):
         super(HasMaxIter, self).__init__()
-        #: param for max number of iterations
-        self.maxIter = Param(self, "maxIter", "max number of iterations")
+        #: param for max number of iterations (>= 0)
+        self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0)")
         if None is not None:
             self._setDefault(maxIter=None)
 
@@ -51,16 +51,16 @@ def getMaxIter(self):
 
 class HasRegParam(Params):
     """
-    Mixin for param regParam: regularization constant.
+    Mixin for param regParam: regularization parameter (>= 0).
     """
 
     # a placeholder to make it appear in the generated doc
-    regParam = Param(Params._dummy(), "regParam", "regularization constant")
+    regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0)")
 
     def __init__(self):
         super(HasRegParam, self).__init__()
-        #: param for regularization constant
-        self.regParam = Param(self, "regParam", "regularization constant")
+        #: param for regularization parameter (>= 0)
+        self.regParam = Param(self, "regParam", "regularization parameter (>= 0)")
         if None is not None:
             self._setDefault(regParam=None)
 
@@ -167,16 +167,16 @@ def getPredictionCol(self):
 
 class HasRawPredictionCol(Params):
     """
-    Mixin for param rawPredictionCol: raw prediction column name.
+    Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name.
     """
 
     # a placeholder to make it appear in the generated doc
-    rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction column name")
+    rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
 
     def __init__(self):
         super(HasRawPredictionCol, self).__init__()
-        #: param for raw prediction column name
-        self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction column name")
+        #: param for raw prediction (a.k.a. confidence) column name
+        self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
         if 'rawPrediction' is not None:
             self._setDefault(rawPredictionCol='rawPrediction')
 
@@ -403,14 +403,12 @@ class HasStepSize(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    stepSize = Param(Params._dummy(), "stepSize",
-                     "Step size to be used for each iteration of optimization.")
+    stepSize = Param(Params._dummy(), "stepSize", "Step size to be used for each iteration of optimization.")
 
     def __init__(self):
         super(HasStepSize, self).__init__()
         #: param for Step size to be used for each iteration of optimization.
-        self.stepSize = Param(self, "stepSize",
-                              "Step size to be used for each iteration of optimization.")
+        self.stepSize = Param(self, "stepSize", "Step size to be used for each iteration of optimization.")
         if None is not None:
             self._setDefault(stepSize=None)
 
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 75bb5d749ca87..ba6478dcd58a9 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -128,7 +128,7 @@ def test_param(self):
         testParams = TestParams()
         maxIter = testParams.maxIter
         self.assertEqual(maxIter.name, "maxIter")
-        self.assertEqual(maxIter.doc, "max number of iterations")
+        self.assertEqual(maxIter.doc, "max number of iterations (>= 0)")
         self.assertTrue(maxIter.parent is testParams)
 
     def test_params(self):
@@ -156,7 +156,7 @@ def test_params(self):
         self.assertEquals(
             testParams.explainParams(),
             "\n".join(["inputCol: input column name (undefined)",
-                       "maxIter: max number of iterations (default: 10, current: 100)"]))
+                       "maxIter: max number of iterations (>= 0) (default: 10, current: 100)"]))
 
 
 if __name__ == "__main__":
diff --git a/tox.ini b/tox.ini
index b568029a204cc..76e3f42cde62d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,4 +15,4 @@
 
 [pep8]
 max-line-length=100
-exclude=cloudpickle.py,heapq3.py
+exclude=cloudpickle.py,heapq3.py,shared.py

From 0835f1edd4c9c05439df85c248faf6787d45f7b7 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Sun, 10 May 2015 19:49:42 -0700
Subject: [PATCH 02/59] [SPARK-7512] [SPARKR] Fix RDD's show method to use
 getJRDD

Since the RDD object might be a Pipelined RDD we should use `getJRDD` to get the right handle to the Java object.

Fixes the bug reported at
http://stackoverflow.com/questions/30057702/sparkr-filterrdd-and-flatmap-not-working

cc concretevitamin

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #6035 from shivaram/sparkr-show-bug and squashes the following commits:

d70145c [Shivaram Venkataraman] Fix RDD's show method to use getJRDD Fixes the bug reported at http://stackoverflow.com/questions/30057702/sparkr-filterrdd-and-flatmap-not-working
---
 R/pkg/R/RDD.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 73999a6737032..9138629cac9c0 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -67,8 +67,8 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
 })
 
 setMethod("show", "RDD",
-          function(.Object) {
-              cat(paste(callJMethod(.Object@jrdd, "toString"), "\n", sep=""))
+          function(object) {
+              cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep=""))
           })
 
 setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) {

From 2242ab31e99227a102b0918d73db67e99899fd24 Mon Sep 17 00:00:00 2001
From: tianyi <tianyi.asiainfo@gmail.com>
Date: Mon, 11 May 2015 14:08:15 +0800
Subject: [PATCH 03/59] [SPARK-7519] [SQL] fix minor bugs in thrift server UI

Bugs description:

1. There are extra commas on the top of session list.
2. The format of time in "Start at:" part is not the same as others.
3. The total number of online sessions is wrong.

Author: tianyi <tianyi.asiainfo@gmail.com>

Closes #6048 from tianyi/SPARK-7519 and squashes the following commits:

ed366b7 [tianyi] fix bug
---
 .../sql/hive/thriftserver/HiveThriftServer2.scala      |  4 +++-
 .../sql/hive/thriftserver/ui/ThriftServerPage.scala    | 10 +++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 0be5a92c2546c..3458b04bfba0f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -147,7 +147,7 @@ object HiveThriftServer2 extends Logging {
     override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
       server.stop()
     }
-
+    var onlineSessionNum: Int = 0
     val sessionList = new mutable.LinkedHashMap[String, SessionInfo]
     val executionList = new mutable.LinkedHashMap[String, ExecutionInfo]
     val retainedStatements =
@@ -170,11 +170,13 @@ object HiveThriftServer2 extends Logging {
     def onSessionCreated(ip: String, sessionId: String, userName: String = "UNKNOWN"): Unit = {
       val info = new SessionInfo(sessionId, System.currentTimeMillis, ip, userName)
       sessionList.put(sessionId, info)
+      onlineSessionNum += 1
       trimSessionIfNecessary()
     }
 
     def onSessionClosed(sessionId: String): Unit = {
       sessionList(sessionId).finishTimestamp = System.currentTimeMillis
+      onlineSessionNum -= 1
     }
 
     def onStatementStart(
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
index 71b16b6bebffb..6a2be4a58e5cb 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
@@ -29,7 +29,7 @@ import org.apache.spark.ui.UIUtils._
 import org.apache.spark.ui._
 
 
-/** Page for Spark Web UI that shows statistics of a streaming job */
+/** Page for Spark Web UI that shows statistics of a thrift server */
 private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("") with Logging {
 
   private val listener = parent.listener
@@ -42,7 +42,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
       generateBasicStats() ++
       <br/> ++
       <h4>
-        {listener.sessionList.size} session(s) are online,
+        {listener.onlineSessionNum} session(s) are online,
         running {listener.totalRunning} SQL statement(s)
       </h4> ++
       generateSessionStatsTable() ++
@@ -50,12 +50,12 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
     UIUtils.headerSparkPage("ThriftServer", content, parent, Some(5000))
   }
 
-  /** Generate basic stats of the streaming program */
+  /** Generate basic stats of the thrift server program */
   private def generateBasicStats(): Seq[Node] = {
     val timeSinceStart = System.currentTimeMillis() - startTime.getTime
     <ul class ="unstyled">
       <li>
-        <strong>Started at: </strong> {startTime.toString}
+        <strong>Started at: </strong> {formatDate(startTime)}
       </li>
       <li>
         <strong>Time since start: </strong>{formatDurationVerbose(timeSinceStart)}
@@ -148,7 +148,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
         <tr>
           <td> {session.userName} </td>
           <td> {session.ip} </td>
-          <td> <a href={sessionLink}> {session.sessionId} </a> </td>,
+          <td> <a href={sessionLink}> {session.sessionId} </a> </td>
           <td> {formatDate(session.startTimestamp)} </td>
           <td> {if(session.finishTimestamp > 0) formatDate(session.finishTimestamp)} </td>
           <td> {formatDurationOption(Some(session.totalTime))} </td>

From d70a076892e0677acceccaba665908cdf664f1b4 Mon Sep 17 00:00:00 2001
From: Wesley Miao <wesley.miao@gmail.com>
Date: Mon, 11 May 2015 12:20:06 +0100
Subject: [PATCH 04/59] [SPARK-7326] [STREAMING] Performing window() on a
 WindowedDStream doesn't work all the time

tdas

https://issues.apache.org/jira/browse/SPARK-7326

The problem most likely resides in DStream.slice() implementation, as shown below.

  def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = {
    if (!isInitialized) {
      throw new SparkException(this + " has not been initialized")
    }
    if (!(fromTime - zeroTime).isMultipleOf(slideDuration)) {
      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration ("
        + slideDuration + ")")
    }
    if (!(toTime - zeroTime).isMultipleOf(slideDuration)) {
      logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration ("
        + slideDuration + ")")
    }
    val alignedToTime = toTime.floor(slideDuration, zeroTime)
    val alignedFromTime = fromTime.floor(slideDuration, zeroTime)

    logInfo("Slicing from " + fromTime + " to " + toTime +
      " (aligned to " + alignedFromTime + " and " + alignedToTime + ")")

    alignedFromTime.to(alignedToTime, slideDuration).flatMap(time => {
      if (time >= zeroTime) getOrCompute(time) else None
    })
  }

Here after performing floor() on both fromTime and toTime, the result (alignedFromTime - zeroTime) and (alignedToTime - zeroTime) may no longer be multiple of the slidingDuration, thus making isTimeValid() check failed for all the remaining computation.

The fix is to add a new floor() function in Time.scala to respect the zeroTime while performing the floor :

  def floor(that: Duration, zeroTime: Time): Time = {
    val t = that.milliseconds
    new Time(((this.millis - zeroTime.milliseconds) / t) * t + zeroTime.milliseconds)
  }

And then change the DStream.slice to call this new floor function by passing in its zeroTime.

    val alignedToTime = toTime.floor(slideDuration, zeroTime)
    val alignedFromTime = fromTime.floor(slideDuration, zeroTime)

This way the alignedToTime and alignedFromTime are *really* aligned in respect to zeroTime whose value is not really a 0.

Author: Wesley Miao <wesley.miao@gmail.com>
Author: Wesley <wesley.miao@autodesk.com>

Closes #5871 from wesleymiao/spark-7326 and squashes the following commits:

82a4d8c [Wesley Miao] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream dosen't work all the time
48b4dc0 [Wesley] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doesn't work all the time
6ade399 [Wesley] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doesn't work all the time
2611745 [Wesley Miao] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doesn't work all the time
---
 .../org/apache/spark/streaming/Time.scala     |  5 +++++
 .../spark/streaming/dstream/DStream.scala     | 22 ++++++++++++-------
 .../apache/spark/streaming/TimeSuite.scala    |  3 +++
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
index 42c49678d24f0..92cfd7d40338c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
@@ -63,6 +63,11 @@ case class Time(private val millis: Long) {
     new Time((this.millis / t) * t)
   }
 
+  def floor(that: Duration, zeroTime: Time): Time = {
+    val t = that.milliseconds
+    new Time(((this.millis - zeroTime.milliseconds) / t) * t + zeroTime.milliseconds)
+  }
+
   def isMultipleOf(that: Duration): Boolean =
     (this.millis % that.milliseconds == 0)
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index f1f8a70655996..7092a3d3f0b86 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -763,16 +763,22 @@ abstract class DStream[T: ClassTag] (
     if (!isInitialized) {
       throw new SparkException(this + " has not been initialized")
     }
-    if (!(fromTime - zeroTime).isMultipleOf(slideDuration)) {
-      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration ("
-        + slideDuration + ")")
+
+    val alignedToTime = if ((toTime - zeroTime).isMultipleOf(slideDuration)) {
+      toTime
+    } else {
+      logWarning("toTime (" + toTime + ") is not a multiple of slideDuration ("
+          + slideDuration + ")")
+        toTime.floor(slideDuration, zeroTime)
     }
-    if (!(toTime - zeroTime).isMultipleOf(slideDuration)) {
-      logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration ("
-        + slideDuration + ")")
+
+    val alignedFromTime = if ((fromTime - zeroTime).isMultipleOf(slideDuration)) {
+      fromTime
+    } else {
+      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration ("
+      + slideDuration + ")")
+      fromTime.floor(slideDuration, zeroTime)
     }
-    val alignedToTime = toTime.floor(slideDuration)
-    val alignedFromTime = fromTime.floor(slideDuration)
 
     logInfo("Slicing from " + fromTime + " to " + toTime +
       " (aligned to " + alignedFromTime + " and " + alignedToTime + ")")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
index 5579ac364346c..e6a01656f479d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
@@ -69,6 +69,9 @@ class TimeSuite extends TestSuiteBase {
     assert(new Time(1200).floor(new Duration(200)) == new Time(1200))
     assert(new Time(199).floor(new Duration(200)) == new Time(0))
     assert(new Time(1).floor(new Duration(1)) == new Time(1))
+    assert(new Time(1350).floor(new Duration(200), new Time(50)) == new Time(1250))
+    assert(new Time(1350).floor(new Duration(200), new Time(150)) == new Time(1350))
+    assert(new Time(1350).floor(new Duration(200), new Time(200)) == new Time(1200))
   }
 
   test("isMultipleOf") {

From 042dda3c5c25b5ecb6ae4fd37c85b211b01c187b Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Mon, 11 May 2015 09:14:20 -0700
Subject: [PATCH 05/59] [SPARK-6092] [MLLIB] Add RankingMetrics in
 PySpark/MLlib

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #6044 from yanboliang/spark-6092 and squashes the following commits:

726a9b1 [Yanbo Liang] add newRankingMetrics
33f649c [Yanbo Liang] Add RankingMetrics in PySpark/MLlib
---
 .../mllib/api/python/PythonMLLibAPI.scala     | 10 +++
 python/pyspark/mllib/evaluation.py            | 78 ++++++++++++++++++-
 2 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 8c30ad4b391ae..f4c477596557f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -32,6 +32,7 @@ import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.mllib.classification._
 import org.apache.spark.mllib.clustering._
+import org.apache.spark.mllib.evaluation.RankingMetrics
 import org.apache.spark.mllib.feature._
 import org.apache.spark.mllib.fpm.{FPGrowth, FPGrowthModel}
 import org.apache.spark.mllib.linalg._
@@ -50,6 +51,7 @@ import org.apache.spark.mllib.tree.model.{DecisionTreeModel, GradientBoostedTree
 import org.apache.spark.mllib.tree.{DecisionTree, GradientBoostedTrees, RandomForest}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
@@ -923,6 +925,14 @@ private[python] class PythonMLLibAPI extends Serializable {
     RG.gammaVectorRDD(jsc.sc, shape, scale, numRows, numCols, parts, s)
   }
 
+  /**
+   * Java stub for the constructor of Python mllib RankingMetrics
+   */
+  def newRankingMetrics(predictionAndLabels: DataFrame): RankingMetrics[Any] = {
+    new RankingMetrics(predictionAndLabels.map(
+      r => (r.getSeq(0).toArray[Any], r.getSeq(1).toArray[Any])))
+  }
+
 
 }
 
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 36914597de228..4c777f2180dc9 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -15,9 +15,12 @@
 # limitations under the License.
 #
 
-from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
 from pyspark.sql import SQLContext
-from pyspark.sql.types import StructField, StructType, DoubleType
+from pyspark.sql.types import StructField, StructType, DoubleType, IntegerType, ArrayType
+
+__all__ = ['BinaryClassificationMetrics', 'RegressionMetrics',
+           'MulticlassMetrics', 'RankingMetrics']
 
 
 class BinaryClassificationMetrics(JavaModelWrapper):
@@ -270,6 +273,77 @@ def weightedFMeasure(self, beta=None):
             return self.call("weightedFMeasure", beta)
 
 
+class RankingMetrics(JavaModelWrapper):
+    """
+    Evaluator for ranking algorithms.
+
+    >>> predictionAndLabels = sc.parallelize([
+    ...     ([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]),
+    ...     ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]),
+    ...     ([1, 2, 3, 4, 5], [])])
+    >>> metrics = RankingMetrics(predictionAndLabels)
+    >>> metrics.precisionAt(1)
+    0.33...
+    >>> metrics.precisionAt(5)
+    0.26...
+    >>> metrics.precisionAt(15)
+    0.17...
+    >>> metrics.meanAveragePrecision
+    0.35...
+    >>> metrics.ndcgAt(3)
+    0.33...
+    >>> metrics.ndcgAt(10)
+    0.48...
+
+    """
+
+    def __init__(self, predictionAndLabels):
+        """
+        :param predictionAndLabels: an RDD of (predicted ranking, ground truth set) pairs.
+        """
+        sc = predictionAndLabels.ctx
+        sql_ctx = SQLContext(sc)
+        df = sql_ctx.createDataFrame(predictionAndLabels,
+                                     schema=sql_ctx._inferSchema(predictionAndLabels))
+        java_model = callMLlibFunc("newRankingMetrics", df._jdf)
+        super(RankingMetrics, self).__init__(java_model)
+
+    def precisionAt(self, k):
+        """
+        Compute the average precision of all the queries, truncated at ranking position k.
+
+        If for a query, the ranking algorithm returns n (n < k) results, the precision value
+        will be computed as #(relevant items retrieved) / k. This formula also applies when
+        the size of the ground truth set is less than k.
+
+        If a query has an empty ground truth set, zero will be used as precision together
+        with a log warning.
+        """
+        return self.call("precisionAt", int(k))
+
+    @property
+    def meanAveragePrecision(self):
+        """
+        Returns the mean average precision (MAP) of all the queries.
+        If a query has an empty ground truth set, the average precision will be zero and
+        a log warining is generated.
+        """
+        return self.call("meanAveragePrecision")
+
+    def ndcgAt(self, k):
+        """
+        Compute the average NDCG value of all the queries, truncated at ranking position k.
+        The discounted cumulative gain at position k is computed as:
+            sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
+        and the NDCG is obtained by dividing the DCG value on the ground truth set.
+        In the current implementation, the relevance value is binary.
+
+        If a query has an empty ground truth set, zero will be used as ndcg together with
+        a log warning.
+        """
+        return self.call("ndcgAt", int(k))
+
+
 def _test():
     import doctest
     from pyspark import SparkContext

From 4f8a15519267ac205424270155254382cc2d3690 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <bjcutler@us.ibm.com>
Date: Mon, 11 May 2015 09:23:47 -0700
Subject: [PATCH 06/59] [SPARK-7522] [EXAMPLES] Removed angle brackets from
 dataFormat option

As is, to specify this option on command line, you have to escape the angle brackets.

Author: Bryan Cutler <bjcutler@us.ibm.com>

Closes #6049 from BryanCutler/dataFormat-option-7522 and squashes the following commits:

b34afb4 [Bryan Cutler] [SPARK-7522] Removed angle brackets from dataFormat option
---
 .../org/apache/spark/examples/ml/DecisionTreeExample.scala      | 2 +-
 .../main/scala/org/apache/spark/examples/ml/GBTExample.scala    | 2 +-
 .../org/apache/spark/examples/ml/RandomForestExample.scala      | 2 +-
 .../org/apache/spark/examples/mllib/DecisionTreeRunner.scala    | 2 +-
 .../spark/examples/mllib/GradientBoostedTreesRunner.scala       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
index 8340d91101ab3..54e4073941056 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@@ -112,7 +112,7 @@ object DecisionTreeExample {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
           s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
index c5899b6683c79..33905277c7341 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
@@ -111,7 +111,7 @@ object GBTExample {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
         s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
index 7f88d2681bcaa..9f7cad68a4594 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
@@ -117,7 +117,7 @@ object RandomForestExample {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
         s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index 262fd2c9611d0..b0613632c9946 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -126,7 +126,7 @@ object DecisionTreeRunner {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
           s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
index 0763a7736305a..7416fb5a40848 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
@@ -69,7 +69,7 @@ object GradientBoostedTreesRunner {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
           s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")

From 1b46556999ca126cb593ef052d24afcb75383223 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 11 May 2015 10:58:56 -0700
Subject: [PATCH 07/59] [SPARK-7361] [STREAMING] Throw unambiguous exception
 when attempting to start multiple StreamingContexts in the same JVM

Currently attempt to start a streamingContext while another one is started throws a confusing exception that the action name JobScheduler is already registered. Instead its best to throw a proper exception as it is not supported.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #5907 from tdas/SPARK-7361 and squashes the following commits:

fb81c4a [Tathagata Das] Fix typo
a9cd5bb [Tathagata Das] Added startSite to StreamingContext
5fdfc0d [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7361
5870e2b [Tathagata Das] Added check for multiple streaming contexts
---
 .../spark/streaming/StreamingContext.scala    | 48 +++++++++++++++----
 .../streaming/StreamingContextSuite.scala     | 18 +++++++
 2 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index bbdb4e8af036c..5abe1367752d9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.streaming
 
 import java.io.InputStream
-import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 
 import scala.collection.Map
 import scala.collection.mutable.Queue
@@ -28,8 +28,9 @@ import akka.actor.{Props, SupervisorStrategy}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{BytesWritable, LongWritable, Text}
-import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
+import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
+
 import org.apache.spark._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.input.FixedLengthBinaryInputFormat
@@ -37,8 +38,9 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.receiver.{ActorReceiver, ActorSupervisorStrategy, Receiver}
-import org.apache.spark.streaming.scheduler._
+import org.apache.spark.streaming.scheduler.{JobScheduler, StreamingListener}
 import org.apache.spark.streaming.ui.{StreamingJobProgressListener, StreamingTab}
+import org.apache.spark.util.CallSite
 
 /**
  * Main entry point for Spark Streaming functionality. It provides methods used to create
@@ -202,6 +204,8 @@ class StreamingContext private[streaming] (
   import StreamingContextState._
   private[streaming] var state = Initialized
 
+  private val startSite = new AtomicReference[CallSite](null)
+
   /**
    * Return the associated Spark context
    */
@@ -518,6 +522,7 @@ class StreamingContext private[streaming] (
    * @throws SparkException if the context has already been started or stopped.
    */
   def start(): Unit = synchronized {
+    import StreamingContext._
     if (state == Started) {
       throw new SparkException("StreamingContext has already been started")
     }
@@ -525,10 +530,15 @@ class StreamingContext private[streaming] (
       throw new SparkException("StreamingContext has already been stopped")
     }
     validate()
-    sparkContext.setCallSite(DStream.getCreationSite())
-    scheduler.start()
-    uiTab.foreach(_.attach())
-    state = Started
+    startSite.set(DStream.getCreationSite())
+    sparkContext.setCallSite(startSite.get)
+    ACTIVATION_LOCK.synchronized {
+      assertNoOtherContextIsActive()
+      scheduler.start()
+      uiTab.foreach(_.attach())
+      state = Started
+      setActiveContext(this)
+    }
   }
 
   /**
@@ -603,6 +613,7 @@ class StreamingContext private[streaming] (
     uiTab.foreach(_.detach())
     // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
     state = Stopped
+    StreamingContext.setActiveContext(null)
   }
 }
 
@@ -612,8 +623,29 @@ class StreamingContext private[streaming] (
  */
 
 object StreamingContext extends Logging {
+  /**
+   * Lock that guards access to global variables that track active StreamingContext.
+   */
+  private val ACTIVATION_LOCK = new Object()
 
-  private[streaming] val DEFAULT_CLEANER_TTL = 3600
+  private val activeContext = new AtomicReference[StreamingContext](null)
+
+  private def assertNoOtherContextIsActive(): Unit = {
+    ACTIVATION_LOCK.synchronized {
+      if (activeContext.get() != null) {
+        throw new SparkException(
+          "Only one StreamingContext may be started in this JVM. " +
+            "Currently running StreamingContext was started at" +
+            activeContext.get.startSite.get.longForm)
+      }
+    }
+  }
+
+  private def setActiveContext(ssc: StreamingContext): Unit = {
+    ACTIVATION_LOCK.synchronized {
+      activeContext.set(ssc)
+    }
+  }
 
   @deprecated("Replaced by implicit functions in the DStream companion object. This is " +
     "kept here only for backward compatibility.", "1.3.0")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index a589deb1fa579..11c7fd835bfcd 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -480,6 +480,24 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     }
   }
 
+  test("multiple streaming contexts") {
+    sc = new SparkContext(new SparkConf().setMaster(master).setAppName(appName))
+    ssc = new StreamingContext(sc, Seconds(1))
+    val input = addInputStream(ssc)
+    input.foreachRDD { rdd => rdd.count }
+    ssc.start()
+
+    // Creating another streaming context should not create errors
+    val anotherSsc = new StreamingContext(sc, Seconds(10))
+    val anotherInput = addInputStream(anotherSsc)
+    anotherInput.foreachRDD { rdd => rdd.count }
+
+    val exception = intercept[SparkException] {
+      anotherSsc.start()
+    }
+    assert(exception.getMessage.contains("StreamingContext"), "Did not get the right exception")
+  }
+
   test("DStream and generated RDD creation sites") {
     testPackage.test()
   }

From 0a4844f90a712e796c9404b422cea76d21a5d2e3 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 11:35:16 -0700
Subject: [PATCH 08/59] [SPARK-7462] By default retain group by columns in
 aggregate

Updated Java, Scala, Python, and R.

Author: Reynold Xin <rxin@databricks.com>
Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #5996 from rxin/groupby-retain and squashes the following commits:

aac7119 [Reynold Xin] Merge branch 'groupby-retain' of github.com:rxin/spark into groupby-retain
f6858f6 [Reynold Xin] Merge branch 'master' into groupby-retain
5f923c0 [Reynold Xin] Merge pull request #15 from shivaram/sparkr-groupby-retrain
c1de670 [Shivaram Venkataraman] Revert workaround in SparkR to retain grouped cols Based on reverting code added in commit https://github.com/amplab-extras/spark/commit/9a6be746efc9fafad88122fa2267862ef87aa0e1
b8b87e1 [Reynold Xin] Fixed DataFrameJoinSuite.
d910141 [Reynold Xin] Updated rest of the files
1e6e666 [Reynold Xin] [SPARK-7462] By default retain group by columns in aggregate
---
 R/pkg/R/group.R                               |   4 +-
 python/pyspark/sql/dataframe.py               |   2 +-
 .../org/apache/spark/sql/GroupedData.scala    |  15 +-
 .../scala/org/apache/spark/sql/SQLConf.scala  |   6 +
 .../org/apache/spark/sql/api/r/SQLUtils.scala |  11 -
 .../sql/execution/stat/StatFunctions.scala    |   2 +-
 .../spark/sql/DataFrameAggregateSuite.scala   | 193 ++++++++++++++++++
 .../apache/spark/sql/DataFrameJoinSuite.scala |   4 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 151 +-------------
 .../scala/org/apache/spark/sql/TestData.scala |   2 -
 10 files changed, 218 insertions(+), 172 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala

diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 5a7a8a2caba13..b758481997574 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -102,9 +102,7 @@ setMethod("agg",
                 }
               }
               jcols <- lapply(cols, function(c) { c@jc })
-              # the GroupedData.agg(col, cols*) API does not contain grouping Column
-              sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "aggWithGrouping",
-                                 x@sgd, listToSeq(jcols))
+              sdf <- callJMethod(x@sgd, "agg", jcols[[1]], listToSeq(jcols[-1]))
             } else {
               stop("agg can only support Column or character")
             }
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index a9697999e82cb..c2fa6c87384a6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1069,7 +1069,7 @@ def agg(self, *exprs):
 
         >>> from pyspark.sql import functions as F
         >>> gdf.agg(F.min(df.age)).collect()
-        [Row(MIN(age)=2), Row(MIN(age)=5)]
+        [Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)]
         """
         assert exprs, "exprs should not be empty"
         if len(exprs) == 1 and isinstance(exprs[0], dict):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 53ad67372e024..003a620dcc8ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -135,8 +135,9 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   }
 
   /**
-   * Compute aggregates by specifying a series of aggregate columns. Unlike other methods in this
-   * class, the resulting [[DataFrame]] won't automatically include the grouping columns.
+   * Compute aggregates by specifying a series of aggregate columns. Note that this function by
+   * default retains the grouping columns in its output. To not retain grouping columns, set
+   * `spark.sql.retainGroupColumns` to false.
    *
    * The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
    *
@@ -158,7 +159,15 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
       case expr: NamedExpression => expr
       case expr: Expression => Alias(expr, expr.prettyString)()
     }
-    DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
+    if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
+      val retainedExprs = groupingExprs.map {
+        case expr: NamedExpression => expr
+        case expr: Expression => Alias(expr, expr.prettyString)()
+      }
+      DataFrame(df.sqlContext, Aggregate(groupingExprs, retainedExprs ++ aggExprs, df.logicalPlan))
+    } else {
+      DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 98a75bb4ed2df..dcac97beafb04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -74,6 +74,9 @@ private[spark] object SQLConf {
   // See SPARK-6231.
   val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY = "spark.sql.selfJoinAutoResolveAmbiguity"
 
+  // Whether to retain group by columns or not in GroupedData.agg.
+  val DATAFRAME_RETAIN_GROUP_COLUMNS = "spark.sql.retainGroupColumns"
+
   val USE_SQL_SERIALIZER2 = "spark.sql.useSerializer2"
 
   val USE_JACKSON_STREAMING_API = "spark.sql.json.useJacksonStreamingAPI"
@@ -242,6 +245,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
 
   private[spark] def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
     getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY, "true").toBoolean
+
+  private[spark] def dataFrameRetainGroupColumns: Boolean =
+    getConf(DATAFRAME_RETAIN_GROUP_COLUMNS, "true").toBoolean
   
   /** ********************** SQLConf functionality methods ************ */
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index ae77f72998a22..423ecdff5804a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -72,17 +72,6 @@ private[r] object SQLUtils {
     sqlContext.createDataFrame(rowRDD, schema)
   }
 
-  // A helper to include grouping columns in Agg()
-  def aggWithGrouping(gd: GroupedData, exprs: Column*): DataFrame = {
-    val aggExprs = exprs.map { col =>
-      col.expr match {
-        case expr: NamedExpression => expr
-        case expr: Expression => Alias(expr, expr.simpleString)()
-      }
-    }
-    gd.toDF(aggExprs)
-  }
-
   def dfToRowRDD(df: DataFrame): JavaRDD[Array[Byte]] = {
     df.map(r => rowToRBytes(r))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 71b7f6c2a6756..d22f5fd2d439c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -104,7 +104,7 @@ private[sql] object StatFunctions extends Logging {
   /** Generate a table of frequencies for the elements of two columns. */
   private[sql] def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = {
     val tableName = s"${col1}_$col2"
-    val counts = df.groupBy(col1, col2).agg(col(col1), col(col2), count("*")).take(1e6.toInt)
+    val counts = df.groupBy(col1, col2).agg(count("*")).take(1e6.toInt)
     if (counts.length == 1e6.toInt) {
       logWarning("The maximum limit of 1e6 pairs have been collected, which may not be all of " +
         "the pairs. Please try reducing the amount of distinct items in your columns.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
new file mode 100644
index 0000000000000..35a574f354741
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.TestData._
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.test.TestSQLContext.implicits._
+import org.apache.spark.sql.types.DecimalType
+
+
+class DataFrameAggregateSuite extends QueryTest {
+
+  test("groupBy") {
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b")),
+      Seq(Row(1, 3), Row(2, 3), Row(3, 3))
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b").as("totB")).agg(sum('totB)),
+      Row(9)
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(count("*")),
+      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(Map("*" -> "count")),
+      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(Map("b" -> "sum")),
+      Row(1, 3) :: Row(2, 3) :: Row(3, 3) :: Nil
+    )
+
+    val df1 = Seq(("a", 1, 0, "b"), ("b", 2, 4, "c"), ("a", 2, 3, "d"))
+      .toDF("key", "value1", "value2", "rest")
+
+    checkAnswer(
+      df1.groupBy("key").min(),
+      df1.groupBy("key").min("value1", "value2").collect()
+    )
+    checkAnswer(
+      df1.groupBy("key").min("value2"),
+      Seq(Row("a", 0), Row("b", 4))
+    )
+  }
+
+  test("spark.sql.retainGroupColumns config") {
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b")),
+      Seq(Row(1, 3), Row(2, 3), Row(3, 3))
+    )
+
+    TestSQLContext.conf.setConf("spark.sql.retainGroupColumns", "false")
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b")),
+      Seq(Row(3), Row(3), Row(3))
+    )
+    TestSQLContext.conf.setConf("spark.sql.retainGroupColumns", "true")
+  }
+
+  test("agg without groups") {
+    checkAnswer(
+      testData2.agg(sum('b)),
+      Row(9)
+    )
+  }
+
+  test("average") {
+    checkAnswer(
+      testData2.agg(avg('a)),
+      Row(2.0))
+
+    // Also check mean
+    checkAnswer(
+      testData2.agg(mean('a)),
+      Row(2.0))
+
+    checkAnswer(
+      testData2.agg(avg('a), sumDistinct('a)), // non-partial
+      Row(2.0, 6.0) :: Nil)
+
+    checkAnswer(
+      decimalData.agg(avg('a)),
+      Row(new java.math.BigDecimal(2.0)))
+    checkAnswer(
+      decimalData.agg(avg('a), sumDistinct('a)), // non-partial
+      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
+
+    checkAnswer(
+      decimalData.agg(avg('a cast DecimalType(10, 2))),
+      Row(new java.math.BigDecimal(2.0)))
+    // non-partial
+    checkAnswer(
+      decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))),
+      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
+  }
+
+  test("null average") {
+    checkAnswer(
+      testData3.agg(avg('b)),
+      Row(2.0))
+
+    checkAnswer(
+      testData3.agg(avg('b), countDistinct('b)),
+      Row(2.0, 1))
+
+    checkAnswer(
+      testData3.agg(avg('b), sumDistinct('b)), // non-partial
+      Row(2.0, 2.0))
+  }
+
+  test("zero average") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    checkAnswer(
+      emptyTableData.agg(avg('a)),
+      Row(null))
+
+    checkAnswer(
+      emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial
+      Row(null, null))
+  }
+
+  test("count") {
+    assert(testData2.count() === testData2.map(_ => 1).count())
+
+    checkAnswer(
+      testData2.agg(count('a), sumDistinct('a)), // non-partial
+      Row(6, 6.0))
+  }
+
+  test("null count") {
+    checkAnswer(
+      testData3.groupBy('a).agg(count('b)),
+      Seq(Row(1,0), Row(2, 1))
+    )
+
+    checkAnswer(
+      testData3.groupBy('a).agg(count('a + 'b)),
+      Seq(Row(1,0), Row(2, 1))
+    )
+
+    checkAnswer(
+      testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)),
+      Row(2, 1, 2, 2, 1)
+    )
+
+    checkAnswer(
+      testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial
+      Row(1, 1, 2)
+    )
+  }
+
+  test("zero count") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    assert(emptyTableData.count() === 0)
+
+    checkAnswer(
+      emptyTableData.agg(count('a), sumDistinct('a)), // non-partial
+      Row(0, null))
+  }
+
+  test("zero sum") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    checkAnswer(
+      emptyTableData.agg(sum('a)),
+      Row(null))
+  }
+
+  test("zero sum distinct") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    checkAnswer(
+      emptyTableData.agg(sumDistinct('a)),
+      Row(null))
+  }
+
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index f005f55b6432e..787f3f175fea2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -77,8 +77,8 @@ class DataFrameJoinSuite extends QueryTest {
       df.join(df, df("key") === df("key") && df("value") === 1),
       Row(1, "1", 1, "1") :: Nil)
 
-    val left = df.groupBy("key").agg($"key", count("*"))
-    val right = df.groupBy("key").agg($"key", sum("key"))
+    val left = df.groupBy("key").agg(count("*"))
+    val right = df.groupBy("key").agg(sum("key"))
     checkAnswer(
       left.join(right, left("key") === right("key")),
       Row(1, 1, 1, 1) :: Row(2, 1, 2, 2) :: Nil)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index cf590cbd5219c..7552c1288165f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -22,7 +22,6 @@ import scala.language.postfixOps
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.test.{ExamplePointUDT, ExamplePoint, TestSQLContext}
-import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery
 import org.apache.spark.sql.test.TestSQLContext.implicits._
 
 
@@ -63,7 +62,7 @@ class DataFrameSuite extends QueryTest {
     val df = Seq((1,(1,1))).toDF()
 
     checkAnswer(
-      df.groupBy("_1").agg(col("_1"), sum("_2._1")).toDF("key", "total"),
+      df.groupBy("_1").agg(sum("_2._1")).toDF("key", "total"),
       Row(1, 1) :: Nil)
   }
 
@@ -128,7 +127,7 @@ class DataFrameSuite extends QueryTest {
       df2
         .select('_1 as 'letter, 'number)
         .groupBy('letter)
-        .agg('letter, countDistinct('number)),
+        .agg(countDistinct('number)),
       Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil
     )
   }
@@ -165,48 +164,6 @@ class DataFrameSuite extends QueryTest {
       testData.select('key).collect().toSeq)
   }
 
-  test("groupBy") {
-    checkAnswer(
-      testData2.groupBy("a").agg($"a", sum($"b")),
-      Seq(Row(1, 3), Row(2, 3), Row(3, 3))
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg($"a", sum($"b").as("totB")).agg(sum('totB)),
-      Row(9)
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg(col("a"), count("*")),
-      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg(Map("*" -> "count")),
-      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg(Map("b" -> "sum")),
-      Row(1, 3) :: Row(2, 3) :: Row(3, 3) :: Nil
-    )
-
-    val df1 = Seq(("a", 1, 0, "b"), ("b", 2, 4, "c"), ("a", 2, 3, "d"))
-      .toDF("key", "value1", "value2", "rest")
-
-    checkAnswer(
-      df1.groupBy("key").min(),
-      df1.groupBy("key").min("value1", "value2").collect()
-    )
-    checkAnswer(
-      df1.groupBy("key").min("value2"),
-      Seq(Row("a", 0), Row("b", 4))
-    )
-  }
-
-  test("agg without groups") {
-    checkAnswer(
-      testData2.agg(sum('b)),
-      Row(9)
-    )
-  }
-
   test("convert $\"attribute name\" into unresolved attribute") {
     checkAnswer(
       testData.where($"key" === lit(1)).select($"value"),
@@ -303,110 +260,6 @@ class DataFrameSuite extends QueryTest {
       mapData.take(1).map(r => Row.fromSeq(r.productIterator.toSeq)))
   }
 
-  test("average") {
-    checkAnswer(
-      testData2.agg(avg('a)),
-      Row(2.0))
-
-    // Also check mean
-    checkAnswer(
-      testData2.agg(mean('a)),
-      Row(2.0))
-
-    checkAnswer(
-      testData2.agg(avg('a), sumDistinct('a)), // non-partial
-      Row(2.0, 6.0) :: Nil)
-
-    checkAnswer(
-      decimalData.agg(avg('a)),
-      Row(new java.math.BigDecimal(2.0)))
-    checkAnswer(
-      decimalData.agg(avg('a), sumDistinct('a)), // non-partial
-      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
-
-    checkAnswer(
-      decimalData.agg(avg('a cast DecimalType(10, 2))),
-      Row(new java.math.BigDecimal(2.0)))
-    // non-partial
-    checkAnswer(
-      decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))),
-      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
-  }
-
-  test("null average") {
-    checkAnswer(
-      testData3.agg(avg('b)),
-      Row(2.0))
-
-    checkAnswer(
-      testData3.agg(avg('b), countDistinct('b)),
-      Row(2.0, 1))
-
-    checkAnswer(
-      testData3.agg(avg('b), sumDistinct('b)), // non-partial
-      Row(2.0, 2.0))
-  }
-
-  test("zero average") {
-    checkAnswer(
-      emptyTableData.agg(avg('a)),
-      Row(null))
-
-    checkAnswer(
-      emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial
-      Row(null, null))
-  }
-
-  test("count") {
-    assert(testData2.count() === testData2.map(_ => 1).count())
-
-    checkAnswer(
-      testData2.agg(count('a), sumDistinct('a)), // non-partial
-      Row(6, 6.0))
-  }
-
-  test("null count") {
-    checkAnswer(
-      testData3.groupBy('a).agg('a, count('b)),
-      Seq(Row(1,0), Row(2, 1))
-    )
-
-    checkAnswer(
-      testData3.groupBy('a).agg('a, count('a + 'b)),
-      Seq(Row(1,0), Row(2, 1))
-    )
-
-    checkAnswer(
-      testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)),
-      Row(2, 1, 2, 2, 1)
-    )
-
-    checkAnswer(
-      testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial
-      Row(1, 1, 2)
-    )
-  }
-
-  test("zero count") {
-    assert(emptyTableData.count() === 0)
-
-    checkAnswer(
-      emptyTableData.agg(count('a), sumDistinct('a)), // non-partial
-      Row(0, null))
-  }
-
-  test("zero sum") {
-    checkAnswer(
-      emptyTableData.agg(sum('a)),
-      Row(null))
-  }
-
-  test("zero sum distinct") {
-    checkAnswer(
-      emptyTableData.agg(sumDistinct('a)),
-      Row(null))
-  }
-
   test("except") {
     checkAnswer(
       lowerCaseData.except(upperCaseData),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 225b51bd73d6c..446771ab2a5a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -86,8 +86,6 @@ object TestData {
       TestData3(2, Some(2)) :: Nil).toDF()
   testData3.registerTempTable("testData3")
 
-  val emptyTableData = logical.LocalRelation($"a".int, $"b".int)
-
   case class UpperCaseData(N: Int, L: String)
   val upperCaseData =
     TestSQLContext.sparkContext.parallelize(

From 82fee9d9aad2c9ba2fb4bd658579fe99218cafac Mon Sep 17 00:00:00 2001
From: Sandy Ryza <sandy@cloudera.com>
Date: Mon, 11 May 2015 12:09:39 -0700
Subject: [PATCH 09/59] [SPARK-6470] [YARN] Add support for YARN node labels.

This is difficult to write a test for because it relies on the latest version of YARN, but I verified manually that the patch does pass along the label expression on this version and containers are successfully launched.

Author: Sandy Ryza <sandy@cloudera.com>

Closes #5242 from sryza/sandy-spark-6470 and squashes the following commits:

6af87b9 [Sandy Ryza] Change info to warning
6e22d99 [Sandy Ryza] [YARN] SPARK-6470.  Add support for YARN node labels.
---
 docs/running-on-yarn.md                       |  9 ++++++
 .../spark/deploy/yarn/YarnAllocator.scala     | 31 ++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 4fb4a90307ec8..51c1339165024 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -220,6 +220,15 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
   Otherwise, the client process will exit after submission.
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.executor.nodeLabelExpression</code></td>
+  <td>(none)</td>
+  <td>
+  A YARN node label expression that restricts the set of nodes executors will be scheduled on.
+  Only versions of YARN greater than or equal to 2.6 support node label expressions, so when
+  running against earlier versions, this property will be ignored.
+  </td>
+</tr>
 </table>
 
 # Launching Spark on YARN
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 88d68d5556162..8a08f561a2df2 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -117,6 +117,24 @@ private[yarn] class YarnAllocator(
   // For testing
   private val launchContainers = sparkConf.getBoolean("spark.yarn.launchContainers", true)
 
+  private val labelExpression = sparkConf.getOption("spark.yarn.executor.nodeLabelExpression")
+
+  // ContainerRequest constructor that can take a node label expression. We grab it through
+  // reflection because it's only available in later versions of YARN.
+  private val nodeLabelConstructor = labelExpression.flatMap { expr =>
+    try {
+      Some(classOf[ContainerRequest].getConstructor(classOf[Resource],
+        classOf[Array[String]], classOf[Array[String]], classOf[Priority], classOf[Boolean],
+        classOf[String]))
+    } catch {
+      case e: NoSuchMethodException => {
+        logWarning(s"Node label expression $expr will be ignored because YARN version on" +
+          " classpath does not support it.")
+        None
+      }
+    }
+  }
+
   def getNumExecutorsRunning: Int = numExecutorsRunning
 
   def getNumExecutorsFailed: Int = numExecutorsFailed
@@ -211,7 +229,7 @@ private[yarn] class YarnAllocator(
         s"cores and ${resource.getMemory} MB memory including $memoryOverhead MB overhead")
 
       for (i <- 0 until missing) {
-        val request = new ContainerRequest(resource, null, null, RM_REQUEST_PRIORITY)
+        val request = createContainerRequest(resource)
         amClient.addContainerRequest(request)
         val nodes = request.getNodes
         val hostStr = if (nodes == null || nodes.isEmpty) "Any" else nodes.last
@@ -230,6 +248,17 @@ private[yarn] class YarnAllocator(
     }
   }
 
+  /**
+   * Creates a container request, handling the reflection required to use YARN features that were
+   * added in recent versions.
+   */
+  private def createContainerRequest(resource: Resource): ContainerRequest = {
+    nodeLabelConstructor.map { constructor =>
+      constructor.newInstance(resource, null, null, RM_REQUEST_PRIORITY, true: java.lang.Boolean,
+        labelExpression.orNull)
+    }.getOrElse(new ContainerRequest(resource, null, null, RM_REQUEST_PRIORITY))
+  }
+
   /**
    * Handle containers granted by the RM by launching executors on them.
    *

From 7ce2a33c3acffa17ae32f48ebb40b69b9b36dae4 Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@hortonworks.com>
Date: Mon, 11 May 2015 13:35:06 -0700
Subject: [PATCH 10/59] [SPARK-7508] JettyUtils-generated servlets to log &
 report all errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch for SPARK-7508

This logs  warn then generates a response which include the message body and stack trace as text/plain, no-cache. The exit code is 500.

In practise (in some tests in SPARK-1537 to be precise), jetty is getting in between this servlet and the web response the user sees —the body of the response is lost for any error response (500, even 404 and bad request). The standard Jetty handlers must be getting in the way.

This patch doesn't address that, it ensures that
1. if the jetty handlers were put to one side the users would see the errors
2. at least the exceptions appear in the server-side logs.

This is better to users saying "I saw a 500 error" and you not having anything in the logs to see what went wrong.

Author: Steve Loughran <stevel@hortonworks.com>

Closes #6033 from steveloughran/stevel/feature/SPARK-7508-JettyUtils and squashes the following commits:

584836f [Steve Loughran] SPARK-7508 drop trailing semicolon
ad6f185 [Steve Loughran] SPARK-7508: jetty handles exception reporting itself; spark just sets this up and logs exceptions before being relayed
258d9f9 [Steve Loughran] SPARK-7508 fix typo manually-edited before patch pushed
69c8263 [Steve Loughran] SPARK-7508 JettyUtils-generated servlets to log & report all errors
---
 core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index dfd6fdb5e9993..06e616220c706 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -78,6 +78,9 @@ private[spark] object JettyUtils extends Logging {
         } catch {
           case e: IllegalArgumentException =>
             response.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getMessage)
+          case e: Exception =>
+            logWarning(s"GET ${request.getRequestURI} failed: $e", e)
+            throw e
         }
       }
       // SPARK-5983 ensure TRACE is not supported
@@ -217,6 +220,9 @@ private[spark] object JettyUtils extends Logging {
       val pool = new QueuedThreadPool
       pool.setDaemon(true)
       server.setThreadPool(pool)
+      val errorHandler = new ErrorHandler()
+      errorHandler.setShowStacks(true)
+      server.addBean(errorHandler)
       server.setHandler(collection)
       try {
         server.start()

From 6e9910c21ada19ae14122b5f14a6a30845b98229 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Mon, 11 May 2015 14:19:11 -0700
Subject: [PATCH 11/59] [SPARK-7515] [DOC] Update documentation for PySpark on
 YARN with cluster mode

Now PySpark on YARN with cluster mode is supported so let's update doc.

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #6040 from sarutak/update-doc-for-pyspark-on-yarn and squashes the following commits:

ad9f88c [Kousuke Saruta] Brushed up sentences
469fd2e [Kousuke Saruta] Merge branch 'master' of https://github.com/apache/spark into update-doc-for-pyspark-on-yarn
fcfdb92 [Kousuke Saruta] Updated doc for PySpark on YARN with cluster mode
---
 docs/submitting-applications.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index 3ecbf2308cd44..e58645274e525 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -59,7 +59,7 @@ for applications that involve the REPL (e.g. Spark shell).
 Alternatively, if your application is submitted from a machine far from the worker machines (e.g.
 locally on your laptop), it is common to use `cluster` mode to minimize network latency between
 the drivers and the executors. Note that `cluster` mode is currently not supported for
-Mesos clusters or Python applications.
+Mesos clusters. Currently only YARN supports cluster mode for Python applications.
 
 For Python applications, simply pass a `.py` file in the place of `<application-jar>` instead of a JAR,
 and add Python `.zip`, `.egg` or `.py` files to the search path with `--py-files`.

From 8e674331d9ce98068b44e4d483b6d35cef0648fa Mon Sep 17 00:00:00 2001
From: gchen <chenguancheng@gmail.com>
Date: Mon, 11 May 2015 14:37:18 -0700
Subject: [PATCH 12/59] [SPARK-7516] [Minor] [DOC] Replace depreciated
 inferSchema() with createDataFrame()

JIRA: https://issues.apache.org/jira/browse/SPARK-7516

In sql-programming-guide, deprecated python data frame api inferSchema() should be replaced by createDataFrame():

schemaPeople = sqlContext.inferSchema(people) ->
schemaPeople = sqlContext.createDataFrame(people)

Author: gchen <chenguancheng@gmail.com>

Closes #6041 from gchen/python-docs and squashes the following commits:

c27eb7c [gchen] replace inferSchema() with createDataFrame()
---
 docs/sql-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index df4c123bdd86c..6af10432b9d03 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -470,7 +470,7 @@ parts = lines.map(lambda l: l.split(","))
 people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))
 
 # Infer the schema, and register the DataFrame as a table.
-schemaPeople = sqlContext.inferSchema(people)
+schemaPeople = sqlContext.createDataFrame(people)
 schemaPeople.registerTempTable("people")
 
 # SQL can be run over DataFrames that have been registered as a table.

From 25c01c54840a9ab768f8b917de7edc2bc2d61b9e Mon Sep 17 00:00:00 2001
From: jerryshao <saisai.shao@intel.com>
Date: Mon, 11 May 2015 14:38:58 -0700
Subject: [PATCH 13/59] [STREAMING] [MINOR] Close files correctly when iterator
 is finished in streaming WAL recovery

Currently there's no chance to close the file correctly after the iteration is finished, change to `CompletionIterator` to avoid resource leakage.

Author: jerryshao <saisai.shao@intel.com>

Closes #6050 from jerryshao/close-file-correctly and squashes the following commits:

52dfaf5 [jerryshao] Close files correctly when iterator is finished
---
 .../apache/spark/streaming/util/FileBasedWriteAheadLog.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index 9985fedc35141..87ba4f84a9ceb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -26,7 +26,7 @@ import scala.language.postfixOps
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{CompletionIterator, ThreadUtils}
 import org.apache.spark.{Logging, SparkConf}
 
 /**
@@ -124,7 +124,8 @@ private[streaming] class FileBasedWriteAheadLog(
 
     logFilesToRead.iterator.map { file =>
       logDebug(s"Creating log reader with $file")
-      new FileBasedWriteAheadLogReader(file, hadoopConf)
+      val reader = new FileBasedWriteAheadLogReader(file, hadoopConf)
+      CompletionIterator[ByteBuffer, Iterator[ByteBuffer]](reader, reader.close _)
     } flatMap { x => x }
   }
 

From a8ea09683acc071cd81b244e8d2b7d9638b1aced Mon Sep 17 00:00:00 2001
From: LCY Vincent <lauchunyin@gmail.com>
Date: Mon, 11 May 2015 14:48:10 -0700
Subject: [PATCH 14/59] Update Documentation: leftsemi instead of semijoin

should sync up with here?
https://github.com/apache/spark/blob/119f45d61d7b48d376cca05e1b4f0c7fcf65bfa8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala#L26

Author: LCY Vincent <lauchunyin@gmail.com>

Closes #5944 from vincentlaucy/master and squashes the following commits:

fc0e454 [LCY Vincent] Update DataFrame.scala
---
 sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 7947042c14299..c92ca607fbd54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -433,7 +433,7 @@ class DataFrame private[sql](
    *
    * @param right Right side of the join.
    * @param joinExprs Join expression.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `semijoin`.
+   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
    * @group dfops
    */
   def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = {

From 91dc3dfd77126afbb894a783b7201b87b59f73cb Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 12 May 2015 00:25:43 +0100
Subject: [PATCH 15/59] [MINOR] [DOCS] Fix the link to test building info on
 the wiki

Author: Sean Owen <sowen@cloudera.com>

Closes #6063 from srowen/FixRunningTestsLink and squashes the following commits:

db62018 [Sean Owen] Fix the link to test building info on the wiki
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c3afc4db9c63c..9c09d40e2bdae 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ can be run using:
     ./dev/run-tests
 
 Please see the guidance on how to 
-[run all automated tests](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-AutomatedTesting).
+[run tests for a module, or individual tests](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools).
 
 ## A Note About Hadoop Versions
 

From 4f4dbb030c208caba18f314a1ef1751696627d26 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 17:02:11 -0700
Subject: [PATCH 16/59] [SQL] Show better error messages for incorrect join
 types in DataFrames.

As a follow-up to https://github.com/apache/spark/pull/5944

Author: Reynold Xin <rxin@databricks.com>

Closes #6064 from rxin/jointype-better-error and squashes the following commits:

7629bf7 [Reynold Xin] [SQL] Show better error messages for incorrect join types in DataFrames.
---
 .../apache/spark/sql/catalyst/plans/joinTypes.scala    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index 5dc0539caec24..77dec7ca6e2b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -24,6 +24,16 @@ object JoinType {
     case "leftouter" | "left" => LeftOuter
     case "rightouter" | "right" => RightOuter
     case "leftsemi" => LeftSemi
+    case _ =>
+      val supported = Seq(
+        "inner",
+        "outer", "full", "fullouter",
+        "leftouter", "left",
+        "rightouter", "right",
+        "leftsemi")
+
+      throw new IllegalArgumentException(s"Unsupported join type '$typ'. " +
+        "Supported join types include: " + supported.mkString("'", "', '", "'") + ".")
   }
 }
 

From 57255dcd794222f4db5df1e549ebc7b896cebfdc Mon Sep 17 00:00:00 2001
From: madhukar <phatak.dev@gmail.com>
Date: Mon, 11 May 2015 17:04:11 -0700
Subject: [PATCH 17/59] [SPARK-7084] improve saveAsTable documentation

Author: madhukar <phatak.dev@gmail.com>

Closes #5654 from phatak-dev/master and squashes the following commits:

386f407 [madhukar] #5654 updated for all the methods
2c997c5 [madhukar] Merge branch 'master' of https://github.com/apache/spark
00bc819 [madhukar] Merge branch 'master' of https://github.com/apache/spark
2a802c6 [madhukar] #5654 updated the doc according to comments
866e8df [madhukar] [SPARK-7084] improve saveAsTable documentation
---
 .../scala/org/apache/spark/sql/DataFrame.scala | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index c92ca607fbd54..729cfc1da25b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1192,6 +1192,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1208,6 +1211,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1232,6 +1238,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1248,6 +1257,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1264,6 +1276,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1285,6 +1300,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental

From 3a9b6997df3fef1052d8c410f32319018c52acff Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 18:07:12 -0700
Subject: [PATCH 18/59] [SPARK-7462][SQL] Update documentation for retaining
 grouping columns in DataFrames.

Author: Reynold Xin <rxin@databricks.com>

Closes #6062 from rxin/agg-retain-doc and squashes the following commits:

43e511e [Reynold Xin] [SPARK-7462][SQL] Update documentation for retaining grouping columns in DataFrames.
---
 docs/sql-programming-guide.md                 | 60 ++++++++++++++++++-
 python/pyspark/sql/_types.py                  |  2 +
 .../org/apache/spark/sql/GroupedData.scala    | 14 ++++-
 3 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 6af10432b9d03..6b7b867ea6b5d 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1594,6 +1594,64 @@ options.
 
 # Migration Guide
 
+## Upgrading from Spark SQL 1.3 to 1.4
+
+Based on user feedback, we changed the default behavior of `DataFrame.groupBy().agg()` to retain the grouping columns in the resulting `DataFrame`. To keep the behavior in 1.3, set `spark.sql.retainGroupColumns` to `false`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg($"department", max("age"), sum("expense"))
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"))
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"));
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false");
+
+{% endhighlight %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+
+import pyspark.sql.functions as func
+
+# In 1.3.x, in order for the grouping column "department" to show up,
+# it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg("department"), func.max("age"), func.sum("expense"))
+
+# In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(func.max("age"), func.sum("expense"))
+
+# Revert to 1.3.x behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+</div>
+
+
 ## Upgrading from Spark SQL 1.0-1.2 to 1.3
 
 In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the
@@ -1651,7 +1709,7 @@ moved into the udf object in `SQLContext`.
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
-{% highlight java %}
+{% highlight scala %}
 
 sqlContext.udf.register("strLen", (s: String) => s.length())
 
diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/_types.py
index fd98e116d2cf1..b96851a174d49 100644
--- a/python/pyspark/sql/_types.py
+++ b/python/pyspark/sql/_types.py
@@ -1228,12 +1228,14 @@ def __getattr__(self, item):
             raise AttributeError(item)
 
     def __reduce__(self):
+        """Returns a tuple so Python knows how to pickle Row."""
         if hasattr(self, "__fields__"):
             return (_create_row, (self.__fields__, tuple(self)))
         else:
             return tuple.__reduce__(self)
 
     def __repr__(self):
+        """Printable representation of Row used in Python REPL."""
         if hasattr(self, "__fields__"):
             return "Row(%s)" % ", ".join("%s=%r" % (k, v)
                                          for k, v in zip(self.__fields__, tuple(self)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 003a620dcc8ce..543320e471bf7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -146,11 +146,21 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *
    *   // Scala:
    *   import org.apache.spark.sql.functions._
-   *   df.groupBy("department").agg($"department", max($"age"), sum($"expense"))
+   *   df.groupBy("department").agg(max("age"), sum("expense"))
    *
    *   // Java:
    *   import static org.apache.spark.sql.functions.*;
-   *   df.groupBy("department").agg(col("department"), max(col("age")), sum(col("expense")));
+   *   df.groupBy("department").agg(max("age"), sum("expense"));
+   * }}}
+   *
+   * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change
+   * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`.
+   * {{{
+   *   // Scala, 1.3.x:
+   *   df.groupBy("department").agg($"department", max("age"), sum("expense"))
+   *
+   *   // Java, 1.3.x:
+   *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
    * }}}
    */
   @scala.annotation.varargs

From 87229c95c6b597f5b84e36d518b9830e3ba63424 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 18:10:45 -0700
Subject: [PATCH 19/59] Updated DataFrame.saveAsTable Hive warning to include
 SPARK-7550 ticket.

So users that are interested in this can track it easily.

Author: Reynold Xin <rxin@databricks.com>

Closes #6067 from rxin/SPARK-7550 and squashes the following commits:

ee0e34c [Reynold Xin] Updated DataFrame.saveAsTable Hive warning to include SPARK-7550 ticket.
---
 .../main/scala/org/apache/spark/sql/DataFrame.scala  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 729cfc1da25b2..2472999de37f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1194,7 +1194,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1213,7 +1213,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1240,7 +1240,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1259,7 +1259,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1278,7 +1278,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1302,7 +1302,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental

From 35fb42a0b01d3043b7d5e27256d1b45a08583aab Mon Sep 17 00:00:00 2001
From: Xusen Yin <yinxusen@gmail.com>
Date: Mon, 11 May 2015 18:41:22 -0700
Subject: [PATCH 20/59] [SPARK-5893] [ML] Add bucketizer

JIRA issue [here](https://issues.apache.org/jira/browse/SPARK-5893).

One thing to make clear, the `buckets` parameter, which is an array of `Double`, performs as split points. Say,

```scala
buckets = Array(-0.5, 0.0, 0.5)
```

splits the real number into 4 ranges, (-inf, -0.5], (-0.5, 0.0], (0.0, 0.5], (0.5, +inf), which is encoded as 0, 1, 2, 3.

Author: Xusen Yin <yinxusen@gmail.com>
Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5980 from yinxusen/SPARK-5893 and squashes the following commits:

dc8c843 [Xusen Yin] Merge pull request #4 from jkbradley/yinxusen-SPARK-5893
1ca973a [Joseph K. Bradley] one more bucketizer test
34f124a [Joseph K. Bradley] Removed lowerInclusive, upperInclusive params from Bucketizer, and used splits instead.
eacfcfa [Xusen Yin] change ML attribute from splits into buckets
c3cc770 [Xusen Yin] add more unit test for binary search
3a16cc2 [Xusen Yin] refine comments and names
ac77859 [Xusen Yin] fix style error
fb30d79 [Xusen Yin] fix and test binary search
2466322 [Xusen Yin] refactor Bucketizer
11fb00a [Xusen Yin] change it into an Estimator
998bc87 [Xusen Yin] check buckets
4024cf1 [Xusen Yin] add test suite
5fe190e [Xusen Yin] add bucketizer
---
 .../apache/spark/ml/feature/Bucketizer.scala  | 131 ++++++++++++++++
 .../apache/spark/ml/util/SchemaUtils.scala    |  11 ++
 .../spark/ml/feature/BucketizerSuite.scala    | 148 ++++++++++++++++++
 3 files changed, 290 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
 create mode 100644 mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
new file mode 100644
index 0000000000000..7dba64bc3506f
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.attribute.NominalAttribute
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
+
+/**
+ * :: AlphaComponent ::
+ * `Bucketizer` maps a column of continuous features to a column of feature buckets.
+ */
+@AlphaComponent
+final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
+  extends Model[Bucketizer] with HasInputCol with HasOutputCol {
+
+  def this() = this(null)
+
+  /**
+   * Parameter for mapping continuous features into buckets. With n splits, there are n+1 buckets.
+   * A bucket defined by splits x,y holds values in the range [x,y). Splits should be strictly
+   * increasing. Values at -inf, inf must be explicitly provided to cover all Double values;
+   * otherwise, values outside the splits specified will be treated as errors.
+   * @group param
+   */
+  val splits: Param[Array[Double]] = new Param[Array[Double]](this, "splits",
+    "Split points for mapping continuous features into buckets. With n splits, there are n+1 " +
+      "buckets. A bucket defined by splits x,y holds values in the range [x,y). The splits " +
+      "should be strictly increasing. Values at -inf, inf must be explicitly provided to cover" +
+      " all Double values; otherwise, values outside the splits specified will be treated as" +
+      " errors.",
+    Bucketizer.checkSplits)
+
+  /** @group getParam */
+  def getSplits: Array[Double] = $(splits)
+
+  /** @group setParam */
+  def setSplits(value: Array[Double]): this.type = set(splits, value)
+
+  /** @group setParam */
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  override def transform(dataset: DataFrame): DataFrame = {
+    transformSchema(dataset.schema)
+    val bucketizer = udf { feature: Double =>
+      Bucketizer.binarySearchForBuckets($(splits), feature)
+    }
+    val newCol = bucketizer(dataset($(inputCol)))
+    val newField = prepOutputField(dataset.schema)
+    dataset.withColumn($(outputCol), newCol.as($(outputCol), newField.metadata))
+  }
+
+  private def prepOutputField(schema: StructType): StructField = {
+    val buckets = $(splits).sliding(2).map(bucket => bucket.mkString(", ")).toArray
+    val attr = new NominalAttribute(name = Some($(outputCol)), isOrdinal = Some(true),
+      values = Some(buckets))
+    attr.toStructField()
+  }
+
+  override def transformSchema(schema: StructType): StructType = {
+    SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
+    SchemaUtils.appendColumn(schema, prepOutputField(schema))
+  }
+}
+
+private[feature] object Bucketizer {
+  /** We require splits to be of length >= 3 and to be in strictly increasing order. */
+  def checkSplits(splits: Array[Double]): Boolean = {
+    if (splits.length < 3) {
+      false
+    } else {
+      var i = 0
+      while (i < splits.length - 1) {
+        if (splits(i) >= splits(i + 1)) return false
+        i += 1
+      }
+      true
+    }
+  }
+
+  /**
+   * Binary searching in several buckets to place each data point.
+   * @throws RuntimeException if a feature is < splits.head or >= splits.last
+   */
+  def binarySearchForBuckets(
+      splits: Array[Double],
+      feature: Double): Double = {
+    // Check bounds.  We make an exception for +inf so that it can exist in some bin.
+    if ((feature < splits.head) || (feature >= splits.last && feature != Double.PositiveInfinity)) {
+      throw new RuntimeException(s"Feature value $feature out of Bucketizer bounds" +
+        s" [${splits.head}, ${splits.last}).  Check your features, or loosen " +
+        s"the lower/upper bound constraints.")
+    }
+    var left = 0
+    var right = splits.length - 2
+    while (left < right) {
+      val mid = (left + right) / 2
+      val split = splits(mid + 1)
+      if (feature < split) {
+        right = mid
+      } else {
+        left = mid + 1
+      }
+    }
+    left
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
index 0383bf0b382b7..11592b77eb356 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
@@ -58,4 +58,15 @@ object SchemaUtils {
     val outputFields = schema.fields :+ StructField(colName, dataType, nullable = false)
     StructType(outputFields)
   }
+
+  /**
+   * Appends a new column to the input schema. This fails if the given output column already exists.
+   * @param schema input schema
+   * @param col New column schema
+   * @return new schema with the input column appended
+   */
+  def appendColumn(schema: StructType, col: StructField): StructType = {
+    require(!schema.fieldNames.contains(col.name), s"Column ${col.name} already exists.")
+    StructType(schema.fields :+ col)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
new file mode 100644
index 0000000000000..acb46c0a35709
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkException
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+
+class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient private var sqlContext: SQLContext = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sqlContext = new SQLContext(sc)
+  }
+
+  test("Bucket continuous features, without -inf,inf") {
+    // Check a set of valid feature values.
+    val splits = Array(-0.5, 0.0, 0.5)
+    val validData = Array(-0.5, -0.3, 0.0, 0.2)
+    val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0)
+    val dataFrame: DataFrame =
+      sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected")
+
+    val bucketizer: Bucketizer = new Bucketizer()
+      .setInputCol("feature")
+      .setOutputCol("result")
+      .setSplits(splits)
+
+    bucketizer.transform(dataFrame).select("result", "expected").collect().foreach {
+      case Row(x: Double, y: Double) =>
+        assert(x === y,
+          s"The feature value is not correct after bucketing.  Expected $y but found $x")
+    }
+
+    // Check for exceptions when using a set of invalid feature values.
+    val invalidData1: Array[Double] = Array(-0.9) ++ validData
+    val invalidData2 = Array(0.5) ++ validData
+    val badDF1 = sqlContext.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx")
+    intercept[RuntimeException]{
+      bucketizer.transform(badDF1).collect()
+      println("Invalid feature value -0.9 was not caught as an invalid feature!")
+    }
+    val badDF2 = sqlContext.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx")
+    intercept[RuntimeException]{
+      bucketizer.transform(badDF2).collect()
+      println("Invalid feature value 0.5 was not caught as an invalid feature!")
+    }
+  }
+
+  test("Bucket continuous features, with -inf,inf") {
+    val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
+    val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9)
+    val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0)
+    val dataFrame: DataFrame =
+      sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected")
+
+    val bucketizer: Bucketizer = new Bucketizer()
+      .setInputCol("feature")
+      .setOutputCol("result")
+      .setSplits(splits)
+
+    bucketizer.transform(dataFrame).select("result", "expected").collect().foreach {
+      case Row(x: Double, y: Double) =>
+        assert(x === y,
+          s"The feature value is not correct after bucketing.  Expected $y but found $x")
+    }
+  }
+
+  test("Binary search correctness on hand-picked examples") {
+    import BucketizerSuite.checkBinarySearch
+    // length 3, with -inf
+    checkBinarySearch(Array(Double.NegativeInfinity, 0.0, 1.0))
+    // length 4
+    checkBinarySearch(Array(-1.0, -0.5, 0.0, 1.0))
+    // length 5
+    checkBinarySearch(Array(-1.0, -0.5, 0.0, 1.0, 1.5))
+    // length 3, with inf
+    checkBinarySearch(Array(0.0, 1.0, Double.PositiveInfinity))
+    // length 3, with -inf and inf
+    checkBinarySearch(Array(Double.NegativeInfinity, 1.0, Double.PositiveInfinity))
+    // length 4, with -inf and inf
+    checkBinarySearch(Array(Double.NegativeInfinity, 0.0, 1.0, Double.PositiveInfinity))
+  }
+
+  test("Binary search correctness in contrast with linear search, on random data") {
+    val data = Array.fill(100)(Random.nextDouble())
+    val splits: Array[Double] = Double.NegativeInfinity +:
+      Array.fill(10)(Random.nextDouble()).sorted :+ Double.PositiveInfinity
+    val bsResult = Vectors.dense(data.map(x => Bucketizer.binarySearchForBuckets(splits, x)))
+    val lsResult = Vectors.dense(data.map(x => BucketizerSuite.linearSearchForBuckets(splits, x)))
+    assert(bsResult ~== lsResult absTol 1e-5)
+  }
+}
+
+private object BucketizerSuite extends FunSuite {
+  /** Brute force search for buckets.  Bucket i is defined by the range [split(i), split(i+1)). */
+  def linearSearchForBuckets(splits: Array[Double], feature: Double): Double = {
+    require(feature >= splits.head)
+    var i = 0
+    while (i < splits.length - 1) {
+      if (feature < splits(i + 1)) return i
+      i += 1
+    }
+    throw new RuntimeException(
+      s"linearSearchForBuckets failed to find bucket for feature value $feature")
+  }
+
+  /** Check all values in splits, plus values between all splits. */
+  def checkBinarySearch(splits: Array[Double]): Unit = {
+    def testFeature(feature: Double, expectedBucket: Double): Unit = {
+      assert(Bucketizer.binarySearchForBuckets(splits, feature) === expectedBucket,
+        s"Expected feature value $feature to be in bucket $expectedBucket with splits:" +
+          s" ${splits.mkString(", ")}")
+    }
+    var i = 0
+    while (i < splits.length - 1) {
+      testFeature(splits(i), i) // Split i should fall in bucket i.
+      testFeature((splits(i) + splits(i + 1)) / 2, i) // Value between splits i,i+1 should be in i.
+      i += 1
+    }
+    if (splits.last === Double.PositiveInfinity) {
+      testFeature(Double.PositiveInfinity, splits.length - 2)
+    }
+  }
+}

From f9c7580adadce75a94bd2854cf4f743d8cbd1d23 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 11 May 2015 18:53:50 -0700
Subject: [PATCH 21/59] [SPARK-7530] [STREAMING] Added
 StreamingContext.getState() to expose the current state of the context

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6058 from tdas/SPARK-7530 and squashes the following commits:

80ee0e6 [Tathagata Das] STARTED --> ACTIVE
3da6547 [Tathagata Das] Added synchronized
dd88444 [Tathagata Das] Added more docs
e1a8505 [Tathagata Das] Fixed comment length
89f9980 [Tathagata Das] Change to Java enum and added Java test
7c57351 [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7530
dd4e702 [Tathagata Das] Addressed comments.
3d56106 [Tathagata Das] Added Mima excludes
2b86ba1 [Tathagata Das] Added scala docs.
1722433 [Tathagata Das] Fixed style
976b094 [Tathagata Das] Added license
0585130 [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7530
e0f0a05 [Tathagata Das] Added getState and exposed StreamingContextState
---
 project/MimaExcludes.scala                    |  4 +
 .../spark/streaming/StreamingContext.scala    | 75 +++++++++++--------
 .../streaming/StreamingContextState.java      | 45 +++++++++++
 .../api/java/JavaStreamingContext.scala       | 22 ++++++
 .../apache/spark/streaming/JavaAPISuite.java  | 14 ++++
 .../spark/streaming/JavaTestUtils.scala       |  1 +
 .../streaming/StreamingContextSuite.scala     | 19 ++++-
 7 files changed, 147 insertions(+), 33 deletions(-)
 create mode 100644 streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index cfe387faec14b..ad3d8426bdedd 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -106,6 +106,10 @@ object MimaExcludes {
               "org.apache.spark.sql.parquet.ParquetTestData$"),
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.TestGroupWriteSupport")
+          ) ++ Seq(
+            // SPARK-7530 Added StreamingContext.getState()
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.StreamingContext.state_=")
           )
 
         case v if v.startsWith("1.3") =>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 5abe1367752d9..2c5834defa8c8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -32,10 +32,11 @@ import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
 import org.apache.spark._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.input.FixedLengthBinaryInputFormat
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContextState._
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.receiver.{ActorReceiver, ActorSupervisorStrategy, Receiver}
 import org.apache.spark.streaming.scheduler.{JobScheduler, StreamingListener}
@@ -195,14 +196,7 @@ class StreamingContext private[streaming] (
   assert(env.metricsSystem != null)
   env.metricsSystem.registerSource(streamingSource)
 
-  /** Enumeration to identify current state of the StreamingContext */
-  private[streaming] object StreamingContextState extends Enumeration {
-    type CheckpointState = Value
-    val Initialized, Started, Stopped = Value
-  }
-
-  import StreamingContextState._
-  private[streaming] var state = Initialized
+  private var state: StreamingContextState = INITIALIZED
 
   private val startSite = new AtomicReference[CallSite](null)
 
@@ -516,6 +510,21 @@ class StreamingContext private[streaming] (
     )
   }
 
+  /**
+   * :: DeveloperApi ::
+   *
+   * Return the current state of the context. The context can be in three possible states -
+   * - StreamingContextState.INTIALIZED - The context has been created, but not been started yet.
+   *   Input DStreams, transformations and output operations can be created on the context.
+   * - StreamingContextState.ACTIVE - The context has been started, and been not stopped.
+   *   Input DStreams, transformations and output operations cannot be created on the context.
+   * - StreamingContextState.STOPPED - The context has been stopped and cannot be used any more.
+   */
+  @DeveloperApi
+  def getState(): StreamingContextState = synchronized {
+    state
+  }
+
   /**
    * Start the execution of the streams.
    *
@@ -523,11 +532,13 @@ class StreamingContext private[streaming] (
    */
   def start(): Unit = synchronized {
     import StreamingContext._
-    if (state == Started) {
-      throw new SparkException("StreamingContext has already been started")
-    }
-    if (state == Stopped) {
-      throw new SparkException("StreamingContext has already been stopped")
+    state match {
+      case INITIALIZED =>
+        // good to start
+      case ACTIVE =>
+        throw new SparkException("StreamingContext has already been started")
+      case STOPPED =>
+        throw new SparkException("StreamingContext has already been stopped")
     }
     validate()
     startSite.set(DStream.getCreationSite())
@@ -536,7 +547,7 @@ class StreamingContext private[streaming] (
       assertNoOtherContextIsActive()
       scheduler.start()
       uiTab.foreach(_.attach())
-      state = Started
+      state = StreamingContextState.ACTIVE
       setActiveContext(this)
     }
   }
@@ -598,22 +609,26 @@ class StreamingContext private[streaming] (
    *                       received data to be completed
    */
   def stop(stopSparkContext: Boolean, stopGracefully: Boolean): Unit = synchronized {
-    state match {
-      case Initialized => logWarning("StreamingContext has not been started yet")
-      case Stopped => logWarning("StreamingContext has already been stopped")
-      case Started =>
-        scheduler.stop(stopGracefully)
-        logInfo("StreamingContext stopped successfully")
-        waiter.notifyStop()
+    try {
+      state match {
+        case INITIALIZED =>
+          logWarning("StreamingContext has not been started yet")
+        case STOPPED =>
+          logWarning("StreamingContext has already been stopped")
+        case ACTIVE =>
+          scheduler.stop(stopGracefully)
+          uiTab.foreach(_.detach())
+          StreamingContext.setActiveContext(null)
+          waiter.notifyStop()
+          logInfo("StreamingContext stopped successfully")
+      }
+      // Even if we have already stopped, we still need to attempt to stop the SparkContext because
+      // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true).
+      if (stopSparkContext) sc.stop()
+    } finally {
+      // The state should always be Stopped after calling `stop()`, even if we haven't started yet
+      state = STOPPED
     }
-    // Even if the streaming context has not been started, we still need to stop the SparkContext.
-    // Even if we have already stopped, we still need to attempt to stop the SparkContext because
-    // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true).
-    if (stopSparkContext) sc.stop()
-    uiTab.foreach(_.detach())
-    // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
-    state = Stopped
-    StreamingContext.setActiveContext(null)
   }
 }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java
new file mode 100644
index 0000000000000..d7b639383ee34
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Represents the state of a StreamingContext.
+ */
+@DeveloperApi
+public enum StreamingContextState {
+  /**
+   * The context has been created, but not been started yet.
+   * Input DStreams, transformations and output operations can be created on the context.
+   */
+  INITIALIZED,
+
+  /**
+   * The context has been started, and been not stopped.
+   * Input DStreams, transformations and output operations cannot be created on the context.
+   */
+  ACTIVE,
+
+  /**
+   * The context has been stopped and cannot be used any more.
+   */
+  STOPPED
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 572d7d8e8753d..d8fbed2c50644 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -578,6 +578,28 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
     ssc.addStreamingListener(streamingListener)
   }
 
+  /**
+   * :: DeveloperApi ::
+   *
+   * Return the current state of the context. The context can be in three possible states -
+   * <ul>
+   *   <li>
+   *   StreamingContextState.INTIALIZED - The context has been created, but not been started yet.
+   *   Input DStreams, transformations and output operations can be created on the context.
+   *   </li>
+   *   <li>
+   *   StreamingContextState.ACTIVE - The context has been started, and been not stopped.
+   *   Input DStreams, transformations and output operations cannot be created on the context.
+   *   </li>
+   *   <li>
+   *   StreamingContextState.STOPPED - The context has been stopped and cannot be used any more.
+   *   </li>
+   * </ul>
+   */
+  def getState(): StreamingContextState = {
+    ssc.getState()
+  }
+
   /**
    * Start the execution of the streams.
    */
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index b1adf881dd0f5..2e00b980b9e44 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -70,6 +70,20 @@ public void testInitialization() {
     Assert.assertNotNull(ssc.sparkContext());
   }
 
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testContextState() {
+    List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4));
+    Assert.assertTrue(ssc.getState() == StreamingContextState.INITIALIZED);
+    JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaTestUtils.attachTestOutputStream(stream);
+    Assert.assertTrue(ssc.getState() == StreamingContextState.INITIALIZED);
+    ssc.start();
+    Assert.assertTrue(ssc.getState() == StreamingContextState.ACTIVE);
+    ssc.stop();
+    Assert.assertTrue(ssc.getState() == StreamingContextState.STOPPED);
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void testCount() {
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
index c0ea0491c313d..bb80bff6dc2e6 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
@@ -70,6 +70,7 @@ trait JavaTestBase extends TestSuiteBase {
       ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[V]] = {
     implicit val cm: ClassTag[V] =
       implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
+    ssc.getState()
     val res = runStreams[V](ssc.ssc, numBatches, numExpectedOutput)
     val out = new ArrayList[JList[V]]()
     res.map(entry => out.append(new ArrayList[V](entry)))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 11c7fd835bfcd..b8247db7e8950 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -109,15 +109,21 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     assert(ssc.conf.getTimeAsSeconds("spark.cleaner.ttl", "-1") === 10)
   }
 
+  test("state matching") {
+    import StreamingContextState._
+    assert(INITIALIZED === INITIALIZED)
+    assert(INITIALIZED != ACTIVE)
+  }
+
   test("start and stop state check") {
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
 
-    assert(ssc.state === ssc.StreamingContextState.Initialized)
+    assert(ssc.getState() === StreamingContextState.INITIALIZED)
     ssc.start()
-    assert(ssc.state === ssc.StreamingContextState.Started)
+    assert(ssc.getState() === StreamingContextState.ACTIVE)
     ssc.stop()
-    assert(ssc.state === ssc.StreamingContextState.Stopped)
+    assert(ssc.getState() === StreamingContextState.STOPPED)
 
     // Make sure that the SparkContext is also stopped by default
     intercept[Exception] {
@@ -129,9 +135,11 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
     ssc.start()
+    assert(ssc.getState() === StreamingContextState.ACTIVE)
     intercept[SparkException] {
       ssc.start()
     }
+    assert(ssc.getState() === StreamingContextState.ACTIVE)
   }
 
   test("stop multiple times") {
@@ -139,13 +147,16 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop()
+    assert(ssc.getState() === StreamingContextState.STOPPED)
     ssc.stop()
+    assert(ssc.getState() === StreamingContextState.STOPPED)
   }
 
   test("stop before start") {
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
     ssc.stop()  // stop before start should not throw exception
+    assert(ssc.getState() === StreamingContextState.STOPPED)
   }
 
   test("start after stop") {
@@ -156,6 +167,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     intercept[SparkException] {
       ssc.start() // start after stop should throw exception
     }
+    assert(ssc.getState() === StreamingContextState.STOPPED)
   }
 
   test("stop only streaming context") {
@@ -167,6 +179,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop(stopSparkContext = false)
+    assert(ssc.getState() === StreamingContextState.STOPPED)
     assert(sc.makeRDD(1 to 100).collect().size === 100)
     sc.stop()
 

From b6bf4f76c78abfaafa99b3c3c08b498aa9644346 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 19:15:14 -0700
Subject: [PATCH 22/59] [SPARK-7324] [SQL] DataFrame.dropDuplicates

This should also close https://github.com/apache/spark/pull/5870

Author: Reynold Xin <rxin@databricks.com>

Closes #6066 from rxin/dropDups and squashes the following commits:

130692f [Reynold Xin] [SPARK-7324][SQL] DataFrame.dropDuplicates
---
 python/pyspark/sql/dataframe.py               | 36 +++++++++++++++++-
 .../org/apache/spark/sql/DataFrame.scala      | 38 ++++++++++++++++++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 35 +++++++++++++++++
 3 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index c2fa6c87384a6..4eaa8d9c572ff 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -755,8 +755,6 @@ def groupBy(self, *cols):
         jdf = self._jdf.groupBy(self._jcols(*cols))
         return GroupedData(jdf, self.sql_ctx)
 
-    groupby = groupBy
-
     def agg(self, *exprs):
         """ Aggregate on the entire :class:`DataFrame` without groups
         (shorthand for ``df.groupBy.agg()``).
@@ -793,6 +791,36 @@ def subtract(self, other):
         """
         return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)
 
+    def dropDuplicates(self, subset=None):
+        """Return a new :class:`DataFrame` with duplicate rows removed,
+        optionally only considering certain columns.
+
+        >>> from pyspark.sql import Row
+        >>> df = sc.parallelize([ \
+            Row(name='Alice', age=5, height=80), \
+            Row(name='Alice', age=5, height=80), \
+            Row(name='Alice', age=10, height=80)]).toDF()
+        >>> df.dropDuplicates().show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        |  5|    80|Alice|
+        | 10|    80|Alice|
+        +---+------+-----+
+
+        >>> df.dropDuplicates(['name', 'height']).show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        |  5|    80|Alice|
+        +---+------+-----+
+        """
+        if subset is None:
+            jdf = self._jdf.dropDuplicates()
+        else:
+            jdf = self._jdf.dropDuplicates(self._jseq(subset))
+        return DataFrame(jdf, self.sql_ctx)
+
     def dropna(self, how='any', thresh=None, subset=None):
         """Returns a new :class:`DataFrame` omitting rows with null values.
 
@@ -1012,6 +1040,10 @@ def toPandas(self):
         import pandas as pd
         return pd.DataFrame.from_records(self.collect(), columns=self.columns)
 
+    # Pandas compatibility
+    groupby = groupBy
+    drop_duplicates = dropDuplicates
+
 
 # Having SchemaRDD for backward compatibility (for docs)
 class SchemaRDD(DataFrame):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 2472999de37f5..265a61592b943 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql
 import java.io.CharArrayWriter
 import java.sql.DriverManager
 
-
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
@@ -42,7 +41,7 @@ import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD}
 import org.apache.spark.sql.jdbc.JDBCWriteDetails
-import org.apache.spark.sql.json.{JacksonGenerator, JsonRDD}
+import org.apache.spark.sql.json.JacksonGenerator
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
 import org.apache.spark.util.Utils
@@ -932,6 +931,40 @@ class DataFrame private[sql](
     }
   }
 
+  /**
+   * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
+   * This is an alias for `distinct`.
+   * @group dfops
+   */
+  def dropDuplicates(): DataFrame = dropDuplicates(this.columns)
+
+  /**
+   * (Scala-specific) Returns a new [[DataFrame]] with duplicate rows removed, considering only
+   * the subset of columns.
+   *
+   * @group dfops
+   */
+  def dropDuplicates(colNames: Seq[String]): DataFrame = {
+    val groupCols = colNames.map(resolve)
+    val groupColExprIds = groupCols.map(_.exprId)
+    val aggCols = logicalPlan.output.map { attr =>
+      if (groupColExprIds.contains(attr.exprId)) {
+        attr
+      } else {
+        Alias(First(attr), attr.name)()
+      }
+    }
+    Aggregate(groupCols, aggCols, logicalPlan)
+  }
+
+  /**
+   * Returns a new [[DataFrame]] with duplicate rows removed, considering only
+   * the subset of columns.
+   *
+   * @group dfops
+   */
+  def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq)
+
   /**
    * Computes statistics for numeric columns, including count, mean, stddev, min, and max.
    * If no columns are given, this function computes statistics for all numerical columns.
@@ -1089,6 +1122,7 @@ class DataFrame private[sql](
 
   /**
    * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
+   * This is an alias for `dropDuplicates`.
    * @group dfops
    */
   override def distinct: DataFrame = Distinct(logicalPlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 7552c1288165f..2ade955864b71 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -457,4 +457,39 @@ class DataFrameSuite extends QueryTest {
     assert(complexData.filter(complexData("m")("1") === 1).count() == 1)
     assert(complexData.filter(complexData("s")("key") === 1).count() == 1)
   }
+
+  test("SPARK-7324 dropDuplicates") {
+    val testData = TestSQLContext.sparkContext.parallelize(
+      (2, 1, 2) :: (1, 1, 1) ::
+      (1, 2, 1) :: (2, 1, 2) ::
+      (2, 2, 2) :: (2, 2, 1) ::
+      (2, 1, 1) :: (1, 1, 2) ::
+      (1, 2, 2) :: (1, 2, 1) :: Nil).toDF("key", "value1", "value2")
+
+    checkAnswer(
+      testData.dropDuplicates(),
+      Seq(Row(2, 1, 2), Row(1, 1, 1), Row(1, 2, 1),
+        Row(2, 2, 2), Row(2, 1, 1), Row(2, 2, 1),
+        Row(1, 1, 2), Row(1, 2, 2)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("key", "value1")),
+      Seq(Row(2, 1, 2), Row(1, 2, 1), Row(1, 1, 1), Row(2, 2, 2)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("value1", "value2")),
+      Seq(Row(2, 1, 2), Row(1, 2, 1), Row(1, 1, 1), Row(2, 2, 2)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("key")),
+      Seq(Row(2, 1, 2), Row(1, 1, 1)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("value1")),
+      Seq(Row(2, 1, 2), Row(1, 2, 1)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("value2")),
+      Seq(Row(2, 1, 2), Row(1, 1, 1)))
+  }
 }

From e35d878be3b2976333618a356b88440f5e8ba408 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Mon, 11 May 2015 19:21:16 -0700
Subject: [PATCH 23/59] [SPARK-7411] [SQL] Support SerDe for HiveQl in CTAS

This is a follow up of #5876 and should be merged after #5876.

Let's wait for unit testing result from Jenkins.

Author: Cheng Hao <hao.cheng@intel.com>

Closes #5963 from chenghao-intel/useIsolatedClient and squashes the following commits:

f87ace6 [Cheng Hao] remove the TODO and add `resolved condition` for HiveTable
a8260e8 [Cheng Hao] Update code as feedback
f4e243f [Cheng Hao] remove the serde setting for SequenceFile
d166afa [Cheng Hao] style issue
d25a4aa [Cheng Hao] Add SerDe support for CTAS
---
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  66 +++---
 .../org/apache/spark/sql/hive/HiveQl.scala    | 207 +++++++++++++++---
 .../spark/sql/hive/client/ClientWrapper.scala |   6 +
 .../hive/execution/CreateTableAsSelect.scala  |  12 +-
 .../apache/spark/sql/hive/HiveQlSuite.scala   | 177 +++++++++++++++
 .../sql/hive/execution/SQLQuerySuite.scala    |   4 +-
 6 files changed, 390 insertions(+), 82 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index f5398605bc7eb..bbf48efb24440 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -407,64 +407,58 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
    * For example, because of a CREATE TABLE X AS statement.
    */
   object CreateTables extends Rule[LogicalPlan] {
-    import org.apache.hadoop.hive.ql.Context
-    import org.apache.hadoop.hive.ql.parse.{ASTNode, QB, SemanticAnalyzer}
-
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
       // Wait until children are resolved.
       case p: LogicalPlan if !p.childrenResolved => p
+      case p: LogicalPlan if p.resolved => p
+      case p @ CreateTableAsSelect(table, child, allowExisting) =>
+        val schema = if (table.schema.size > 0) {
+          table.schema
+        } else {
+          child.output.map {
+            attr => new HiveColumn(
+              attr.name,
+              HiveMetastoreTypes.toMetastoreType(attr.dataType), null)
+          }
+        }
+
+        val desc = table.copy(schema = schema)
 
-      case CreateTableAsSelect(desc, child, allowExisting) =>
-        if (hive.convertCTAS && !desc.serde.isDefined) {
+        if (hive.convertCTAS && table.serde.isEmpty) {
           // Do the conversion when spark.sql.hive.convertCTAS is true and the query
           // does not specify any storage format (file format and storage handler).
-          if (desc.specifiedDatabase.isDefined) {
+          if (table.specifiedDatabase.isDefined) {
             throw new AnalysisException(
               "Cannot specify database name in a CTAS statement " +
-              "when spark.sql.hive.convertCTAS is set to true.")
+                "when spark.sql.hive.convertCTAS is set to true.")
           }
 
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
             desc.name,
-            conf.defaultDataSourceName,
+            hive.conf.defaultDataSourceName,
             temporary = false,
             mode,
             options = Map.empty[String, String],
             child
           )
         } else {
-          execution.CreateTableAsSelect(
-            desc.copy(
-              specifiedDatabase = Option(desc.specifiedDatabase.getOrElse(client.currentDatabase))),
-            child,
-            allowExisting)
-        }
-
-      case p: LogicalPlan if p.resolved => p
-
-      case p @ CreateTableAsSelect(desc, child, allowExisting) =>
-        val (dbName, tblName) = processDatabaseAndTableName(desc.database, desc.name)
-
-        if (hive.convertCTAS) {
-          if (desc.specifiedDatabase.isDefined) {
-            throw new AnalysisException(
-              "Cannot specify database name in a CTAS statement " +
-              "when spark.sql.hive.convertCTAS is set to true.")
+          val desc = if (table.serde.isEmpty) {
+            // add default serde
+            table.copy(
+              serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+          } else {
+            table
           }
 
-          val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
-          CreateTableUsingAsSelect(
-            tblName,
-            conf.defaultDataSourceName,
-            temporary = false,
-            mode,
-            options = Map.empty[String, String],
-            child
-          )
-        } else {
+          val (dbName, tblName) =
+            processDatabaseAndTableName(
+              desc.specifiedDatabase.getOrElse(client.currentDatabase), desc.name)
+
           execution.CreateTableAsSelect(
-            desc,
+            desc.copy(
+              specifiedDatabase = Some(dbName),
+              name = tblName),
             child,
             allowExisting)
         }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 04d40bbb2bced..2cbb5ca4d2e0c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -22,14 +22,15 @@ import java.sql.Date
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.ql.Context
+import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.hadoop.hive.ql.{ErrorMsg, Context}
 import org.apache.hadoop.hive.ql.exec.{FunctionRegistry, FunctionInfo}
 import org.apache.hadoop.hive.ql.lib.Node
-import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse._
 import org.apache.hadoop.hive.ql.plan.PlanUtils
-import org.apache.spark.sql.AnalysisException
+import org.apache.hadoop.hive.ql.session.SessionState
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
@@ -62,7 +63,13 @@ case class CreateTableAsSelect(
     allowExisting: Boolean) extends UnaryNode with Command {
 
   override def output: Seq[Attribute] = Seq.empty[Attribute]
-  override lazy val resolved: Boolean = tableDesc.specifiedDatabase.isDefined && childrenResolved
+  override lazy val resolved: Boolean =
+    tableDesc.specifiedDatabase.isDefined &&
+    tableDesc.schema.size > 0 &&
+    tableDesc.serde.isDefined &&
+    tableDesc.inputFormat.isDefined &&
+    tableDesc.outputFormat.isDefined &&
+    childrenResolved
 }
 
 /** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
@@ -240,12 +247,23 @@ private[hive] object HiveQl {
      * Otherwise, there will be Null pointer exception,
      * when retrieving properties form HiveConf.
      */
-    val hContext = new Context(new HiveConf())
+    val hContext = new Context(hiveConf)
     val node = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, hContext))
     hContext.clear()
     node
   }
 
+  /**
+   * Returns the HiveConf
+   */
+  private[this] def hiveConf(): HiveConf = {
+    val ss = SessionState.get() // SessionState is lazy initializaion, it can be null here
+    if (ss == null) {
+      new HiveConf()
+    } else {
+      ss.getConf
+    }
+  }
 
   /** Returns a LogicalPlan for a given HiveQL string. */
   def parseSql(sql: String): LogicalPlan = hqlParser.parse(sql)
@@ -476,8 +494,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
       DropTable(tableName, ifExists.nonEmpty)
     // Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan"
     case Token("TOK_ANALYZE",
-            Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) ::
-            isNoscan) =>
+           Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) ::
+           isNoscan) =>
       // Reference:
       // https://cwiki.apache.org/confluence/display/Hive/StatsDev#StatsDev-ExistingTables
       if (partitionSpec.nonEmpty) {
@@ -547,6 +565,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
       val (
           Some(tableNameParts) ::
           _ /* likeTable */ ::
+          externalTable ::
           Some(query) ::
           allowExisting +:
           ignores) =
@@ -554,6 +573,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           Seq(
             "TOK_TABNAME",
             "TOK_LIKETABLE",
+            "EXTERNAL",
             "TOK_QUERY",
             "TOK_IFNOTEXISTS",
             "TOK_TABLECOMMENT",
@@ -576,43 +596,153 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           children)
       val (db, tableName) = extractDbNameTableName(tableNameParts)
 
-      var tableDesc =
-        HiveTable(
-          specifiedDatabase = db,
-          name = tableName,
-          schema = Seq.empty,
-          partitionColumns = Seq.empty,
-          properties = Map.empty,
-          serdeProperties = Map.empty,
-          tableType = ManagedTable,
-          location = None,
-          inputFormat = None,
-          outputFormat = None,
-          serde = None)
-
-      // TODO: Handle all the cases here...
-      children.foreach {
-        case Token("TOK_TBLRCFILE", Nil) =>
-          import org.apache.hadoop.hive.ql.io.{RCFileInputFormat, RCFileOutputFormat}
+      // TODO add bucket support
+      var tableDesc: HiveTable = HiveTable(
+        specifiedDatabase = db,
+        name = tableName,
+        schema = Seq.empty[HiveColumn],
+        partitionColumns = Seq.empty[HiveColumn],
+        properties = Map[String, String](),
+        serdeProperties = Map[String, String](),
+        tableType = if (externalTable.isDefined) ExternalTable else ManagedTable,
+        location = None,
+        inputFormat = None,
+        outputFormat = None,
+        serde = None,
+        viewText = None)
+
+      // default storage type abbriviation (e.g. RCFile, ORC, PARQUET etc.)
+      val defaultStorageType = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT)
+      // handle the default format for the storage type abbriviation
+      tableDesc = if ("SequenceFile".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"),
+            outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat"))
+        } else if ("RCFile".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"),
+            serde = Option(hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE)))
+        } else if ("ORC".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"),
+            serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+        } else if ("PARQUET".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+            outputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+            serde =
+              Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+        } else {
+          tableDesc.copy(
+            inputFormat =
+              Option("org.apache.hadoop.mapred.TextInputFormat"),
+            outputFormat =
+              Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"))
+        }
+
+      children.collect {
+        case list @ Token("TOK_TABCOLLIST", _) =>
+          val cols = BaseSemanticAnalyzer.getColumns(list, true)
+          if (cols != null) {
+            tableDesc = tableDesc.copy(
+              schema = cols.map { field =>
+                HiveColumn(field.getName, field.getType, field.getComment)
+              })
+          }
+        case Token("TOK_TABLECOMMENT", child :: Nil) =>
+          val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
+          // TODO support the sql text
+          tableDesc = tableDesc.copy(viewText = Option(comment))
+        case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: Nil) =>
+          val cols = BaseSemanticAnalyzer.getColumns(list(0), false)
+          if (cols != null) {
+            tableDesc = tableDesc.copy(
+              partitionColumns = cols.map { field =>
+                HiveColumn(field.getName, field.getType, field.getComment)
+              })
+          }
+        case Token("TOK_TABLEROWFORMAT", Token("TOK_SERDEPROPS", child :: Nil) :: Nil)=>
+          val serdeParams = new java.util.HashMap[String, String]()
+          child match {
+            case Token("TOK_TABLEROWFORMATFIELD", rowChild1 :: rowChild2) =>
+              val fieldDelim = BaseSemanticAnalyzer.unescapeSQLString (rowChild1.getText())
+              serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim)
+              serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim)
+              if (rowChild2.length > 1) {
+                val fieldEscape = BaseSemanticAnalyzer.unescapeSQLString (rowChild2(0).getText)
+                serdeParams.put(serdeConstants.ESCAPE_CHAR, fieldEscape)
+              }
+            case Token("TOK_TABLEROWFORMATCOLLITEMS", rowChild :: Nil) =>
+              val collItemDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              serdeParams.put(serdeConstants.COLLECTION_DELIM, collItemDelim)
+            case Token("TOK_TABLEROWFORMATMAPKEYS", rowChild :: Nil) =>
+              val mapKeyDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim)
+            case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) =>
+              val lineDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              if (!(lineDelim == "\n") && !(lineDelim == "10")) {
+                throw new AnalysisException(
+                  SemanticAnalyzer.generateErrorMessage(
+                    rowChild,
+                    ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg))
+              }
+              serdeParams.put(serdeConstants.LINE_DELIM, lineDelim)
+            case Token("TOK_TABLEROWFORMATNULL", rowChild :: Nil) =>
+              val nullFormat = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              // TODO support the nullFormat
+            case _ => assert(false)
+          }
+          tableDesc = tableDesc.copy(
+            serdeProperties = tableDesc.serdeProperties ++ serdeParams)
+        case Token("TOK_TABLELOCATION", child :: Nil) =>
+          var location = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
+          location = EximUtil.relativeToAbsolutePath(hiveConf, location)
+          tableDesc = tableDesc.copy(location = Option(location))
+        case Token("TOK_TABLESERIALIZER", child :: Nil) =>
           tableDesc = tableDesc.copy(
-            outputFormat = Option(classOf[RCFileOutputFormat].getName),
-            inputFormat = Option(classOf[RCFileInputFormat[_, _]].getName))
+            serde = Option(BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText)))
+          if (child.getChildCount == 2) {
+            val serdeParams = new java.util.HashMap[String, String]()
+            BaseSemanticAnalyzer.readProps(
+              (child.getChild(1).getChild(0)).asInstanceOf[ASTNode], serdeParams)
+            tableDesc = tableDesc.copy(serdeProperties = tableDesc.serdeProperties ++ serdeParams)
+          }
+        case Token("TOK_FILEFORMAT_GENERIC", child :: Nil) =>
+          throw new SemanticException(
+            "Unrecognized file format in STORED AS clause:${child.getText}")
 
+        case Token("TOK_TBLRCFILE", Nil) =>
+          tableDesc = tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
           if (tableDesc.serde.isEmpty) {
             tableDesc = tableDesc.copy(
               serde = Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
           }
+
         case Token("TOK_TBLORCFILE", Nil) =>
           tableDesc = tableDesc.copy(
             inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"),
-            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"),
-            serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
+          if (tableDesc.serde.isEmpty) {
+            tableDesc = tableDesc.copy(
+              serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+          }
 
         case Token("TOK_TBLPARQUETFILE", Nil) =>
           tableDesc = tableDesc.copy(
-            inputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
-            outputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
-            serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+            inputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+            outputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
+          if (tableDesc.serde.isEmpty) {
+            tableDesc = tableDesc.copy(
+              serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+          }
 
         case Token("TOK_TABLESERIALIZER",
                Token("TOK_SERDENAME", Token(serdeName, Nil) :: otherProps) :: Nil) =>
@@ -627,13 +757,20 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
 
         case Token("TOK_TABLEPROPERTIES", list :: Nil) =>
           tableDesc = tableDesc.copy(properties = tableDesc.properties ++ getProperties(list))
-
-        case _ =>
+        case list @ Token("TOK_TABLEFILEFORMAT", _) =>
+          tableDesc = tableDesc.copy(
+            inputFormat =
+              Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)),
+            outputFormat =
+              Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(1).getText)))
+        case Token("TOK_STORAGEHANDLER", _) =>
+          throw new AnalysisException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg())
+        case _ => // Unsupport features
       }
 
       CreateTableAsSelect(tableDesc, nodeToPlan(query), allowExisting != None)
 
-    // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command.
+    // If its not a "CTAS" like above then take it as a native command
     case Token("TOK_CREATETABLE", _) => NativePlaceholder
 
     // Support "TRUNCATE TABLE table_name [PARTITION partition_spec]"
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
index 6bca9d0179fe3..99aa0f1ded3f8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
@@ -225,6 +225,12 @@ private[hive] class ClientWrapper(
       table.partitionColumns.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
     table.properties.foreach { case (k, v) => qlTable.setProperty(k, v) }
     table.serdeProperties.foreach { case (k, v) => qlTable.setSerdeParam(k, v) }
+
+    // set owner
+    qlTable.setOwner(conf.getUser)
+    // set create time
+    qlTable.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int])
+
     version match {
       case hive.v12 =>
         table.location.map(new URI(_)).foreach(u => qlTable.call[URI, Unit]("setDataLocation", u))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 91e6ac4032204..7d3ec12c4eb05 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.hadoop.hive.ql.plan.CreateTableDesc
-
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.{AnalysisException, SQLContext}
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.execution.RunnableCommand
@@ -29,13 +27,10 @@ import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation, HiveMetastoreT
 
 /**
  * Create table and insert the query result into it.
- * @param database the database name of the new relation
- * @param tableName the table name of the new relation
+ * @param tableDesc the Table Describe, which may contains serde, storage handler etc.
  * @param query the query whose result will be insert into the new relation
  * @param allowExisting allow continue working if it's already exists, otherwise
  *                      raise exception
- * @param desc the CreateTableDesc, which may contains serde, storage handler etc.
-
  */
 private[hive]
 case class CreateTableAsSelect(
@@ -80,8 +75,7 @@ case class CreateTableAsSelect(
       if (allowExisting) {
         // table already exists, will do nothing, to keep consistent with Hive
       } else {
-        throw
-          new org.apache.hadoop.hive.metastore.api.AlreadyExistsException(s"$database.$tableName")
+        throw new AnalysisException(s"$database.$tableName already exists.")
       }
     } else {
       hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true, false)).toRdd
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
new file mode 100644
index 0000000000000..941a2941649b8
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.session.SessionState
+import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.hive.client.{ManagedTable, HiveColumn, ExternalTable, HiveTable}
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+
+class HiveQlSuite extends FunSuite with BeforeAndAfterAll {
+  override def beforeAll() {
+    if (SessionState.get() == null) {
+      SessionState.start(new HiveConf())
+    }
+  }
+
+  private def extractTableDesc(sql: String): (HiveTable, Boolean) = {
+    HiveQl.createPlan(sql).collect {
+      case CreateTableAsSelect(desc, child, allowExisting) => (desc, allowExisting)
+    }.head
+  }
+
+  test("Test CTAS #1") {
+    val s1 =
+      """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |(viewTime INT,
+        |userid BIGINT,
+        |page_url STRING,
+        |referrer_url STRING,
+        |ip STRING COMMENT 'IP Address of the User',
+        |country STRING COMMENT 'country of origination')
+        |COMMENT 'This is the staging page view table'
+        |PARTITIONED BY (dt STRING COMMENT 'date type', hour STRING COMMENT 'hour of the day')
+        |ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' STORED AS RCFILE
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src""".stripMargin
+
+    val (desc, exists) = extractTableDesc(s1)
+    assert(exists == true)
+    assert(desc.specifiedDatabase == Some("mydb"))
+    assert(desc.name == "page_view")
+    assert(desc.tableType == ExternalTable)
+    assert(desc.location == Some("/user/external/page_view"))
+    assert(desc.schema ==
+      HiveColumn("viewtime", "int", null) ::
+        HiveColumn("userid", "bigint", null) ::
+        HiveColumn("page_url", "string", null) ::
+        HiveColumn("referrer_url", "string", null) ::
+        HiveColumn("ip", "string", "IP Address of the User") ::
+        HiveColumn("country", "string", "country of origination") :: Nil)
+    // TODO will be SQLText
+    assert(desc.viewText == Option("This is the staging page view table"))
+    assert(desc.partitionColumns ==
+      HiveColumn("dt", "string", "date type") ::
+        HiveColumn("hour", "string", "hour of the day") :: Nil)
+    assert(desc.serdeProperties ==
+      Map((serdeConstants.SERIALIZATION_FORMAT, "\054"), (serdeConstants.FIELD_DELIM, "\054")))
+    assert(desc.inputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+    assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+    assert(desc.serde == Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
+    assert(desc.properties == Map(("p1", "v1"), ("p2", "v2")))
+  }
+
+  test("Test CTAS #2") {
+    val s2 =
+      """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |(viewTime INT,
+        |userid BIGINT,
+        |page_url STRING,
+        |referrer_url STRING,
+        |ip STRING COMMENT 'IP Address of the User',
+        |country STRING COMMENT 'country of origination')
+        |COMMENT 'This is the staging page view table'
+        |PARTITIONED BY (dt STRING COMMENT 'date type', hour STRING COMMENT 'hour of the day')
+        |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
+        | STORED AS
+        | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
+        | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src""".stripMargin
+
+    val (desc, exists) = extractTableDesc(s2)
+    assert(exists == true)
+    assert(desc.specifiedDatabase == Some("mydb"))
+    assert(desc.name == "page_view")
+    assert(desc.tableType == ExternalTable)
+    assert(desc.location == Some("/user/external/page_view"))
+    assert(desc.schema ==
+      HiveColumn("viewtime", "int", null) ::
+        HiveColumn("userid", "bigint", null) ::
+        HiveColumn("page_url", "string", null) ::
+        HiveColumn("referrer_url", "string", null) ::
+        HiveColumn("ip", "string", "IP Address of the User") ::
+        HiveColumn("country", "string", "country of origination") :: Nil)
+    // TODO will be SQLText
+    assert(desc.viewText == Option("This is the staging page view table"))
+    assert(desc.partitionColumns ==
+      HiveColumn("dt", "string", "date type") ::
+        HiveColumn("hour", "string", "hour of the day") :: Nil)
+    assert(desc.serdeProperties == Map())
+    assert(desc.inputFormat == Option("parquet.hive.DeprecatedParquetInputFormat"))
+    assert(desc.outputFormat == Option("parquet.hive.DeprecatedParquetOutputFormat"))
+    assert(desc.serde == Option("parquet.hive.serde.ParquetHiveSerDe"))
+    assert(desc.properties == Map(("p1", "v1"), ("p2", "v2")))
+  }
+
+  test("Test CTAS #3") {
+    val s3 = """CREATE TABLE page_view AS SELECT * FROM src"""
+    val (desc, exists) = extractTableDesc(s3)
+    assert(exists == false)
+    assert(desc.specifiedDatabase == None)
+    assert(desc.name == "page_view")
+    assert(desc.tableType == ManagedTable)
+    assert(desc.location == None)
+    assert(desc.schema == Seq.empty[HiveColumn])
+    assert(desc.viewText == None) // TODO will be SQLText
+    assert(desc.serdeProperties == Map())
+    assert(desc.inputFormat == Option("org.apache.hadoop.mapred.TextInputFormat"))
+    assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"))
+    assert(desc.serde.isEmpty)
+    assert(desc.properties == Map())
+  }
+
+  test("Test CTAS #4") {
+    val s4 =
+      """CREATE TABLE page_view
+        |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin
+    intercept[AnalysisException] {
+      extractTableDesc(s4)
+    }
+  }
+
+  test("Test CTAS #5") {
+    val s5 = """CREATE TABLE ctas2
+               | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
+               | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
+               | STORED AS RCFile
+               | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
+               | AS
+               |   SELECT key, value
+               |   FROM src
+               |   ORDER BY key, value""".stripMargin
+    val (desc, exists) = extractTableDesc(s5)
+    assert(exists == false)
+    assert(desc.specifiedDatabase == None)
+    assert(desc.name == "ctas2")
+    assert(desc.tableType == ManagedTable)
+    assert(desc.location == None)
+    assert(desc.schema == Seq.empty[HiveColumn])
+    assert(desc.viewText == None) // TODO will be SQLText
+    assert(desc.serdeProperties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
+    assert(desc.inputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+    assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+    assert(desc.serde == Option("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"))
+    assert(desc.properties == Map(("tbl_p1" -> "p11"), ("tbl_p2" -> "p22")))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 1353802604402..0d739dead4c73 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -201,7 +201,7 @@ class SQLQuerySuite extends QueryTest {
     var message = intercept[AnalysisException] {
       sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
     }.getMessage
-    assert(message.contains("Table ctas1 already exists"))
+    assert(message.contains("ctas1 already exists"))
     checkRelation("ctas1", true)
     sql("DROP TABLE ctas1")
 
@@ -314,7 +314,7 @@ class SQLQuerySuite extends QueryTest {
           SELECT key, value
           FROM src
           ORDER BY key, value""").collect().toSeq)
-    intercept[org.apache.hadoop.hive.metastore.api.AlreadyExistsException] {
+    intercept[AnalysisException] {
       sql(
         """CREATE TABLE ctas4 AS
           | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect()

From 4b5e1fe94c65f0feb77d849e7defa42b6007628d Mon Sep 17 00:00:00 2001
From: Zhongshuai Pei <799203320@qq.com>
Date: Mon, 11 May 2015 19:22:44 -0700
Subject: [PATCH 24/59] [SPARK-7437] [SQL] Fold "literal in (item1, item2, ...,
 literal, ...)" into true or false directly

SQL
```
select key from src where 3 in (4, 5);
```
Before
```
== Optimized Logical Plan ==
Project [key#12]
 Filter 3 INSET (5,4)
  MetastoreRelation default, src, None
```

After
```
== Optimized Logical Plan ==
LocalRelation [key#228], []
```

Author: Zhongshuai Pei <799203320@qq.com>
Author: DoingDone9 <799203320@qq.com>

Closes #5972 from DoingDone9/InToFalse and squashes the following commits:

4c722a2 [Zhongshuai Pei] Update predicates.scala
abe2bbb [Zhongshuai Pei] Update Optimizer.scala
fa461a5 [Zhongshuai Pei] Update Optimizer.scala
e34c28a [Zhongshuai Pei] Update predicates.scala
24739bd [Zhongshuai Pei] Update ConstantFoldingSuite.scala
f4dbf50 [Zhongshuai Pei] Update ConstantFoldingSuite.scala
35ceb7a [Zhongshuai Pei] Update Optimizer.scala
36c194e [Zhongshuai Pei] Update Optimizer.scala
2e8f6ca [Zhongshuai Pei] Update Optimizer.scala
14952e2 [Zhongshuai Pei] Merge pull request #13 from apache/master
f03fe7f [Zhongshuai Pei] Merge pull request #12 from apache/master
f12fa50 [Zhongshuai Pei] Merge pull request #10 from apache/master
f61210c [Zhongshuai Pei] Merge pull request #9 from apache/master
34b1a9a [Zhongshuai Pei] Merge pull request #8 from apache/master
802261c [DoingDone9] Merge pull request #7 from apache/master
d00303b [DoingDone9] Merge pull request #6 from apache/master
98b134f [DoingDone9] Merge pull request #5 from apache/master
161cae3 [DoingDone9] Merge pull request #4 from apache/master
c87e8b6 [DoingDone9] Merge pull request #3 from apache/master
cb1852d [DoingDone9] Merge pull request #2 from apache/master
c3f046f [DoingDone9] Merge pull request #1 from apache/master
---
 .../sql/catalyst/expressions/predicates.scala |  1 +
 .../sql/catalyst/optimizer/Optimizer.scala    |  4 +--
 .../optimizer/ConstantFoldingSuite.scala      | 33 +++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 50b0f3ee5f93f..1d72a9eb834b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -110,6 +110,7 @@ case class InSet(value: Expression, hset: Set[Any])
 
   override def children: Seq[Expression] = value :: Nil
 
+  override def foldable: Boolean = value.foldable
   override def nullable: Boolean = true // TODO: Figure out correct nullability semantics of IN.
   override def toString: String = s"$value INSET ${hset.mkString("(", ",", ")")}"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d7b2f203a6934..1ee5fb245fbb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -46,13 +46,13 @@ object DefaultOptimizer extends Optimizer {
       CombineLimits) ::
     Batch("ConstantFolding", FixedPoint(100),
       NullPropagation,
+      OptimizeIn,
       ConstantFolding,
       LikeSimplification,
       BooleanSimplification,
       SimplifyFilters,
       SimplifyCasts,
-      SimplifyCaseConversionExpressions,
-      OptimizeIn) ::
+      SimplifyCaseConversionExpressions) ::
     Batch("Decimal Optimizations", FixedPoint(100),
       DecimalAggregates) ::
     Batch("LocalRelation", FixedPoint(100),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 6b7d9a85c341b..5697c2272b8e8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -35,6 +35,7 @@ class ConstantFoldingSuite extends PlanTest {
       Batch("AnalysisNodes", Once,
         EliminateSubQueries) ::
       Batch("ConstantFolding", Once,
+        OptimizeIn,
         ConstantFolding,
         BooleanSimplification) :: Nil
   }
@@ -247,4 +248,36 @@ class ConstantFoldingSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+  
+  test("Constant folding test: Fold In(v, list) into true or false") {
+    var originalQuery =
+      testRelation
+        .select('a)
+        .where(In(Literal(1), Seq(Literal(1), Literal(2))))
+
+    var optimized = Optimize.execute(originalQuery.analyze)
+
+    var correctAnswer =
+      testRelation
+        .select('a)
+        .where(Literal(true))
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+
+    originalQuery =
+      testRelation
+        .select('a)
+        .where(In(Literal(1), Seq(Literal(1), 'a.attr)))
+
+    optimized = Optimize.execute(originalQuery.analyze)
+
+    correctAnswer =
+      testRelation
+        .select('a)
+        .where(Literal(true))
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }

From 028ad4bd29106747089bb9a863e9a8dee738770e Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 20:04:36 -0700
Subject: [PATCH 25/59] [SPARK-7509][SQL] DataFrame.drop in Python for dropping
 columns.

Author: Reynold Xin <rxin@databricks.com>

Closes #6068 from rxin/drop-column and squashes the following commits:

9d7d5ec [Reynold Xin] [SPARK-7509][SQL] DataFrame.drop in Python for dropping columns.
---
 python/pyspark/sql/dataframe.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 4eaa8d9c572ff..72180f6d05fbc 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1014,7 +1014,7 @@ def withColumn(self, colName, col):
 
     @ignore_unicode_prefix
     def withColumnRenamed(self, existing, new):
-        """REturns a new :class:`DataFrame` by renaming an existing column.
+        """Returns a new :class:`DataFrame` by renaming an existing column.
 
         :param existing: string, name of the existing column to rename.
         :param col: string, new name of the column.
@@ -1027,6 +1027,18 @@ def withColumnRenamed(self, existing, new):
                 for c in self.columns]
         return self.select(*cols)
 
+    @ignore_unicode_prefix
+    def drop(self, colName):
+        """Returns a new :class:`DataFrame` that drops the specified column.
+
+        :param colName: string, name of the column to drop.
+
+        >>> df.drop('age').collect()
+        [Row(name=u'Alice'), Row(name=u'Bob')]
+        """
+        jdf = self._jdf.drop(colName)
+        return DataFrame(jdf, self.sql_ctx)
+
     def toPandas(self):
         """Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
 

From b94a93371cf219a88edee7677d22f1eaefc1ea5b Mon Sep 17 00:00:00 2001
From: Joshi <rekhajoshm@gmail.com>
Date: Mon, 11 May 2015 21:02:34 -0700
Subject: [PATCH 26/59] [SPARK-7435] [SPARKR] Make DataFrame.show() consistent
 with that of Scala and pySpark

Author: Joshi <rekhajoshm@gmail.com>
Author: Rekha Joshi <rekhajoshm@gmail.com>

Closes #5989 from rekhajoshm/fix/SPARK-7435 and squashes the following commits:

cfc9e02 [Joshi] Spark-7435[R]: updated patch for review comments
62becc1 [Joshi] SPARK-7435: Update to DataFrame
e3677c9 [Rekha Joshi] Merge pull request #1 from apache/master
---
 R/pkg/R/DataFrame.R              | 7 ++++---
 R/pkg/inst/tests/test_sparkSQL.R | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 8a9d2dd45c588..2705817531019 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -150,7 +150,7 @@ setMethod("isLocal",
             callJMethod(x@sdf, "isLocal")
           })
 
-#' ShowDF
+#' showDF
 #'
 #' Print the first numRows rows of a DataFrame
 #'
@@ -170,7 +170,8 @@ setMethod("isLocal",
 setMethod("showDF",
           signature(x = "DataFrame"),
           function(x, numRows = 20) {
-            callJMethod(x@sdf, "showString", numToInt(numRows))
+            s <- callJMethod(x@sdf, "showString", numToInt(numRows))
+            cat(s)
           })
 
 #' show
@@ -187,7 +188,7 @@ setMethod("showDF",
 #' sqlCtx <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlCtx, path)
-#' show(df)
+#' df
 #'}
 setMethod("show", "DataFrame",
           function(object) {
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index dbb535e245321..99c28830c6237 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -653,7 +653,8 @@ test_that("toJSON() returns an RDD of the correct values", {
 
 test_that("showDF()", {
   df <- jsonFile(sqlCtx, jsonPath)
-  expect_output(showDF(df), "+----+-------+\n| age|   name|\n+----+-------+\n|null|Michael|\n|  30|   Andy|\n|  19| Justin|\n+----+-------+\n")
+  s <- capture.output(showDF(df))
+  expect_output(s , "+----+-------+\n| age|   name|\n+----+-------+\n|null|Michael|\n|  30|   Andy|\n|  19| Justin|\n+----+-------+\n")
 })
 
 test_that("isLocal()", {

From 16696759e9a292378cbfdf695a63d6d0cff0d79a Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 22:06:56 -0700
Subject: [PATCH 27/59] [SQL] Rename Dialect -> ParserDialect.

Author: Reynold Xin <rxin@databricks.com>

Closes #6071 from rxin/parserdialect and squashes the following commits:

ca2eb31 [Reynold Xin] Rename Dialect -> ParserDialect.
---
 .../catalyst/{Dialect.scala => ParserDialect.scala}  |  2 +-
 .../main/scala/org/apache/spark/sql/SQLContext.scala | 12 ++++++------
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala   |  4 ++--
 .../org/apache/spark/sql/hive/HiveContext.scala      |  4 ++--
 .../spark/sql/hive/execution/SQLQuerySuite.scala     |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/{Dialect.scala => ParserDialect.scala} (97%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
similarity index 97%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
index 977003493d471..05a92b06f9fd9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
  *
  */
 @DeveloperApi
-abstract class Dialect {
+abstract class ParserDialect {
   // this is the main function that will be implemented by sql parser.
   def parse(sqlText: String): LogicalPlan
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 28fc9d04436f7..648021806f3ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.errors.DialectException
 import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.Dialect
+import org.apache.spark.sql.catalyst.ParserDialect
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, expressions}
 import org.apache.spark.sql.execution.{Filter, _}
 import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
@@ -49,7 +49,7 @@ import org.apache.spark.{Partition, SparkContext}
 
 /**
  * Currently we support the default dialect named "sql", associated with the class
- * [[DefaultDialect]]
+ * [[DefaultParserDialect]]
  *
  * And we can also provide custom SQL Dialect, for example in Spark SQL CLI:
  * {{{
@@ -74,7 +74,7 @@ import org.apache.spark.{Partition, SparkContext}
  *-- "hiveql" (for HiveContext)
  * }}}
  */
-private[spark] class DefaultDialect extends Dialect {
+private[spark] class DefaultParserDialect extends ParserDialect {
   @transient
   protected val sqlParser = new catalyst.SqlParser
 
@@ -176,10 +176,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
   @transient
   protected[sql] val sqlParser = new SparkSQLParser(getSQLDialect().parse(_))
 
-  protected[sql] def getSQLDialect(): Dialect = {
+  protected[sql] def getSQLDialect(): ParserDialect = {
     try {
       val clazz = Utils.classForName(dialectClassName)
-      clazz.newInstance().asInstanceOf[Dialect]
+      clazz.newInstance().asInstanceOf[ParserDialect]
     } catch {
       case NonFatal(e) =>
         // Since we didn't find the available SQL Dialect, it will fail even for SET command:
@@ -209,7 +209,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected[sql] val defaultSession = createSession()
 
   protected[sql] def dialectClassName = if (conf.dialect == "sql") {
-    classOf[DefaultDialect].getCanonicalName
+    classOf[DefaultParserDialect].getCanonicalName
   } else {
     conf.dialect
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b44eb223c80c5..ec0e76cde6f7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.test.TestSQLContext.{udf => _, _}
 import org.apache.spark.sql.types._
 
 /** A SQL Dialect for testing purpose, and it can not be nested type */
-class MyDialect extends DefaultDialect
+class MyDialect extends DefaultParserDialect
 
 class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   // Make sure the tables are loaded.
@@ -94,7 +94,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       newContext.sql("SELECT 1")
     }
     // test if the dialect set back to DefaultSQLDialect
-    assert(newContext.getSQLDialect().getClass === classOf[DefaultDialect])
+    assert(newContext.getSQLDialect().getClass === classOf[DefaultParserDialect])
   }
 
   test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 3bab648e31d1f..61e8c154e8c3c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -22,7 +22,7 @@ import java.sql.Timestamp
 import java.util.{ArrayList => JArrayList}
 
 import org.apache.hadoop.hive.ql.parse.VariableSubstitution
-import org.apache.spark.sql.catalyst.Dialect
+import org.apache.spark.sql.catalyst.ParserDialect
 
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
@@ -54,7 +54,7 @@ import org.apache.spark.util.Utils
 /**
  * This is the HiveQL Dialect, this dialect is strongly bind with HiveContext
  */
-private[hive] class HiveQLDialect extends Dialect {
+private[hive] class HiveQLDialect extends ParserDialect {
   override def parse(sqlText: String): LogicalPlan = {
     HiveQl.parseSql(sqlText)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 0d739dead4c73..a5744ccc68a47 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
 import org.apache.spark.sql.catalyst.errors.DialectException
-import org.apache.spark.sql.DefaultDialect
+import org.apache.spark.sql.DefaultParserDialect
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SQLConf}
 import org.apache.spark.sql.hive.MetastoreRelation
 import org.apache.spark.sql.hive.test.TestHive
@@ -53,7 +53,7 @@ case class WindowData(
     area: String,
     product: Int)
 /** A SQL Dialect for testing purpose, and it can not be nested type */
-class MyDialect extends DefaultDialect
+class MyDialect extends DefaultParserDialect
 
 /**
  * A collection of hive query tests where we generate the answers ourselves instead of depending on
@@ -247,7 +247,7 @@ class SQLQuerySuite extends QueryTest {
 
     // set the dialect back to the DefaultSQLDialect
     sql("SET spark.sql.dialect=sql")
-    assert(getSQLDialect().getClass === classOf[DefaultDialect])
+    assert(getSQLDialect().getClass === classOf[DefaultParserDialect])
     sql("SET spark.sql.dialect=hiveql")
     assert(getSQLDialect().getClass === classOf[HiveQLDialect])
 

From 640f63b959f936ac142ce429262ffc3db4536619 Mon Sep 17 00:00:00 2001
From: vidmantas zemleris <vidmantas@vinted.com>
Date: Mon, 11 May 2015 22:29:24 -0700
Subject: [PATCH 28/59] [SPARK-6994][SQL] Update docs for fetching Row fields
 by name

add docs for https://issues.apache.org/jira/browse/SPARK-6994

Author: vidmantas zemleris <vidmantas@vinted.com>

Closes #6030 from vidma/docs/row-with-named-fields and squashes the following commits:

241b401 [vidmantas zemleris] [SPARK-6994][SQL] Update docs for fetching Row fields by name
---
 docs/sql-programming-guide.md | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 6b7b867ea6b5d..78b8e8ad515a0 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -367,11 +367,18 @@ val people = sc.textFile("examples/src/main/resources/people.txt").map(_.split("
 people.registerTempTable("people")
 
 // SQL statements can be run by using the sql methods provided by sqlContext.
-val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 13 AND age <= 19")
 
 // The results of SQL queries are DataFrames and support all the normal RDD operations.
-// The columns of a row in the result can be accessed by ordinal.
+// The columns of a row in the result can be accessed by field index:
 teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
+
+// or by field name:
+teenagers.map(t => "Name: " + t.getAs[String]("name")).collect().foreach(println)
+
+// row.getValuesMap[T] retrieves multiple columns at once into a Map[String, T]
+teenagers.map(_.getValuesMap[Any](List("name", "age"))).collect().foreach(println)
+// Map("name" -> "Justin", "age" -> 19)
 {% endhighlight %}
 
 </div>
@@ -538,7 +545,7 @@ peopleDataFrame.registerTempTable("people")
 val results = sqlContext.sql("SELECT name FROM people")
 
 // The results of SQL queries are DataFrames and support all the normal RDD operations.
-// The columns of a row in the result can be accessed by ordinal.
+// The columns of a row in the result can be accessed by field index or by field name.
 results.map(t => "Name: " + t(0)).collect().foreach(println)
 {% endhighlight %}
 

From 8a4edecc483490da4fce406ac3b401860834ac1e Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Tue, 12 May 2015 01:34:33 -0700
Subject: [PATCH 29/59] [SPARK-7534] [CORE] [WEBUI] Fix the Stage table when a
 stage is missing

Just improved the Stage table when a stage is missing.

Before:

![screen shot 2015-05-11 at 10 11 51 am](https://cloud.githubusercontent.com/assets/1000778/7570842/2ba37380-f7c8-11e4-9b5f-cf1a6264b2a4.png)

After:

![screen shot 2015-05-11 at 10 26 09 am](https://cloud.githubusercontent.com/assets/1000778/7570848/33703152-f7c8-11e4-81a8-d53dd72d7b8d.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #6061 from zsxwing/SPARK-7534 and squashes the following commits:

09fe862 [zsxwing] Leave it blank rather than '-'
6299197 [zsxwing] Fix the Stage table when a stage is missing
---
 .../org/apache/spark/ui/jobs/StageTable.scala     | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index a33243d4252bf..82ba561eefb16 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -121,10 +121,23 @@ private[ui] class StageTableBase(
     <div>{stageDesc.getOrElse("")} {killLink} {nameLink} {details}</div>
   }
 
+  protected def missingStageRow(stageId: Int): Seq[Node] = {
+    <td>{stageId}</td> ++
+    {if (isFairScheduler) {<td>-</td>} else Seq.empty} ++
+    <td>No data available for this stage</td> ++ // Description
+    <td></td> ++ // Submitted
+    <td></td> ++ // Duration
+    <td></td> ++ // Tasks: Succeeded/Total
+    <td></td> ++ // Input
+    <td></td> ++ // Output
+    <td></td> ++ // Shuffle Read
+    <td></td> // Shuffle Write
+  }
+
   protected def stageRow(s: StageInfo): Seq[Node] = {
     val stageDataOption = listener.stageIdToData.get((s.stageId, s.attemptId))
     if (stageDataOption.isEmpty) {
-      return <td>{s.stageId}</td><td>No data available for this stage</td>
+      return missingStageRow(s.stageId)
     }
 
     val stageData = stageDataOption.get

From 984787526625b4ef8a1635faf7a5ac3cb0b758b7 Mon Sep 17 00:00:00 2001
From: linweizhong <linweizhong@huawei.com>
Date: Tue, 12 May 2015 01:36:27 -0700
Subject: [PATCH 30/59] [MINOR] [PYSPARK] Set PYTHONPATH to
 python/lib/pyspark.zip rather than python/pyspark

As PR #5580 we have created pyspark.zip on building and set PYTHONPATH to python/lib/pyspark.zip, so to keep consistence update this.

Author: linweizhong <linweizhong@huawei.com>

Closes #6047 from Sephiroth-Lin/pyspark_pythonpath and squashes the following commits:

8cc3d96 [linweizhong] Set PYTHONPATH to python/lib/pyspark.zip rather than python/pyspark as PR#5580 we have create pyspark.zip on build
---
 .../main/scala/org/apache/spark/api/python/PythonUtils.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index acbaba6791850..9eff0a230cbe2 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -31,7 +31,7 @@ private[spark] object PythonUtils {
   def sparkPythonPath: String = {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
-      pythonPath += Seq(sparkHome, "python").mkString(File.separator)
+      pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
       pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)

From 82e890fb19d6fbaffa69856eecb4699f2f8a81eb Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 12 May 2015 01:39:21 -0700
Subject: [PATCH 31/59] [SPARK-7485] [BUILD] Remove pyspark files from
 assembly.

The sbt part of the build is hacky; it basically tricks sbt
into generating the zip by using a generator, but returns
an empty list for the generated files so that nothing is
actually added to the assembly.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #6022 from vanzin/SPARK-7485 and squashes the following commits:

22c1e04 [Marcelo Vanzin] Remove unneeded code.
4893622 [Marcelo Vanzin] [SPARK-7485] [build] Remove pyspark files from assembly.
---
 core/pom.xml             | 47 ----------------------------------------
 mllib/pom.xml            | 11 ----------
 project/SparkBuild.scala | 44 +++----------------------------------
 sql/core/pom.xml         |  8 -------
 streaming/pom.xml        |  8 -------
 5 files changed, 3 insertions(+), 115 deletions(-)

diff --git a/core/pom.xml b/core/pom.xml
index fc42f48973fe9..262a3320db106 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -381,35 +381,6 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
-      <!-- Unzip py4j so we can include its files in the jar -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>generate-resources</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <target>
-            <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" />
-          </target>
-        </configuration>
-      </plugin>
-      <plugin>
-        <artifactId>maven-clean-plugin</artifactId>
-        <configuration>
-          <filesets>
-            <fileset>
-              <directory>${basedir}/../python/build</directory>
-            </fileset>
-          </filesets>
-          <verbose>true</verbose>
-        </configuration>
-      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
@@ -438,24 +409,6 @@
         </executions>
       </plugin>
     </plugins>
-
-    <resources>
-      <resource>
-        <directory>src/main/resources</directory>
-      </resource>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/*.py</include>
-        </includes>
-      </resource>
-      <resource>
-        <directory>../python/build</directory>
-        <includes>
-          <include>py4j/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 
   <profiles>
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a3c57ae26000b..0c07ca1a62fd3 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -141,16 +141,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <resources>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/mllib/*.py</include>
-          <include>pyspark/mllib/stat/*.py</include>
-          <include>pyspark/ml/*.py</include>
-          <include>pyspark/ml/param/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 186345af0e60e..1b87e4e98bd83 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -168,7 +168,7 @@ object SparkBuild extends PomBuild {
   /* Enable Assembly for all assembly projects */
   assemblyProjects.foreach(enable(Assembly.settings))
 
-  /* Package pyspark artifacts in the main assembly. */
+  /* Package pyspark artifacts in a separate zip file for YARN. */
   enable(PySparkAssembly.settings)(assembly)
 
   /* Enable unidoc only for the root spark project */
@@ -373,22 +373,15 @@ object PySparkAssembly {
   import java.util.zip.{ZipOutputStream, ZipEntry}
 
   lazy val settings = Seq(
-    unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
     // Use a resource generator to copy all .py files from python/pyspark into a managed directory
     // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
     // list since that will copy unneeded / unwanted files.
     resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
       val src = new File(BuildCommons.sparkHome, "python/pyspark")
-
       val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
       zipFile.delete()
       zipRecursive(src, zipFile)
-
-      val dst = new File(outDir, "pyspark")
-      if (!dst.isDirectory()) {
-        require(dst.mkdirs())
-      }
-      copy(src, dst)
+      Seq[File]()
     }
   )
 
@@ -416,42 +409,11 @@ object PySparkAssembly {
           output.write(buf, 0, n)
         }
       }
+      output.closeEntry()
       in.close()
     }
   }
 
-  private def copy(src: File, dst: File): Seq[File] = {
-    src.listFiles().flatMap { f =>
-      val child = new File(dst, f.getName())
-      if (f.isDirectory()) {
-        child.mkdir()
-        copy(f, child)
-      } else if (f.getName().endsWith(".py")) {
-        var in: Option[FileInputStream] = None
-        var out: Option[FileOutputStream] = None
-        try {
-          in = Some(new FileInputStream(f))
-          out = Some(new FileOutputStream(child))
-
-          val bytes = new Array[Byte](1024)
-          var read = 0
-          while (read >= 0) {
-            read = in.get.read(bytes)
-            if (read > 0) {
-              out.get.write(bytes, 0, read)
-            }
-          }
-
-          Some(child)
-        } finally {
-          in.foreach(_.close())
-          out.foreach(_.close())
-        }
-      } else {
-        None
-      }
-    }
-  }
 }
 
 object Unidoc {
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7d274a73e079f..ffe95bb49188f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,13 +103,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <resources>
-      <resource>
-        <directory>../../python</directory>
-        <includes>
-          <include>pyspark/sql/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 5ca55a4f680bb..5ab7f4472c38b 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -105,13 +105,5 @@
         </configuration>
       </plugin>
     </plugins>
-    <resources>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/streaming/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>

From f3e8e60063ccf0d713d03e671a3231560475f90d Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Tue, 12 May 2015 01:40:55 -0700
Subject: [PATCH 32/59] [SPARK-7467] Dag visualization: treat checkpoint as an
 RDD operation

Such that a checkpoint RDD does not go into random scopes on the UI, e.g. `take`. We've seen this in streaming.

Author: Andrew Or <andrew@databricks.com>

Closes #6004 from andrewor14/dag-viz-checkpoint and squashes the following commits:

9217439 [Andrew Or] Fix checkpoints
4ae8806 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-checkpoint
19bc07b [Andrew Or] Treat checkpoint as an RDD operation
---
 .../main/scala/org/apache/spark/rdd/RDD.scala    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 7dad30ecbdd2f..02a94baf372d9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1523,13 +1523,15 @@ abstract class RDD[T: ClassTag](
    * has completed (therefore the RDD has been materialized and potentially stored in memory).
    * doCheckpoint() is called recursively on the parent RDDs.
    */
-  private[spark] def doCheckpoint() {
-    if (!doCheckpointCalled) {
-      doCheckpointCalled = true
-      if (checkpointData.isDefined) {
-        checkpointData.get.doCheckpoint()
-      } else {
-        dependencies.foreach(_.rdd.doCheckpoint())
+  private[spark] def doCheckpoint(): Unit = {
+    RDDOperationScope.withScope(sc, "checkpoint", false, true) {
+      if (!doCheckpointCalled) {
+        doCheckpointCalled = true
+        if (checkpointData.isDefined) {
+          checkpointData.get.doCheckpoint()
+        } else {
+          dependencies.foreach(_.rdd.doCheckpoint())
+        }
       }
     }
   }

From ec6f2a9774167014566fb9608ee4394d2ce5fd6a Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 12 May 2015 08:48:24 -0700
Subject: [PATCH 33/59] [SPARK-7532] [STREAMING] StreamingContext.start() made
 to logWarning and not throw exception

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6060 from tdas/SPARK-7532 and squashes the following commits:

6fe2e83 [Tathagata Das] Update docs
7dadfc3 [Tathagata Das] Fixed bug again
99c7678 [Tathagata Das] Added logInfo
65aec20 [Tathagata Das] Fix bug
5bf031b [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7532
1a9a818 [Tathagata Das] Fix scaladoc
c584313 [Tathagata Das] StreamingContext.start() made to logWarning and not throw exception
---
 .../spark/streaming/StreamingContext.scala    | 27 +++++++++----------
 .../streaming/StreamingContextSuite.scala     |  4 +--
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 2c5834defa8c8..8461e901208ff 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -528,28 +528,27 @@ class StreamingContext private[streaming] (
   /**
    * Start the execution of the streams.
    *
-   * @throws SparkException if the context has already been started or stopped.
+   * @throws SparkException if the StreamingContext is already stopped.
    */
   def start(): Unit = synchronized {
-    import StreamingContext._
     state match {
       case INITIALIZED =>
-        // good to start
+        validate()
+        startSite.set(DStream.getCreationSite())
+        sparkContext.setCallSite(startSite.get)
+        StreamingContext.ACTIVATION_LOCK.synchronized {
+          StreamingContext.assertNoOtherContextIsActive()
+          scheduler.start()
+          uiTab.foreach(_.attach())
+          state = StreamingContextState.ACTIVE
+          StreamingContext.setActiveContext(this)
+        }
+        logInfo("StreamingContext started")
       case ACTIVE =>
-        throw new SparkException("StreamingContext has already been started")
+        logWarning("StreamingContext has already been started")
       case STOPPED =>
         throw new SparkException("StreamingContext has already been stopped")
     }
-    validate()
-    startSite.set(DStream.getCreationSite())
-    sparkContext.setCallSite(startSite.get)
-    ACTIVATION_LOCK.synchronized {
-      assertNoOtherContextIsActive()
-      scheduler.start()
-      uiTab.foreach(_.attach())
-      state = StreamingContextState.ACTIVE
-      setActiveContext(this)
-    }
   }
 
   /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index b8247db7e8950..47299513de095 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -136,9 +136,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     addInputStream(ssc).register()
     ssc.start()
     assert(ssc.getState() === StreamingContextState.ACTIVE)
-    intercept[SparkException] {
-      ssc.start()
-    }
+    ssc.start()
     assert(ssc.getState() === StreamingContextState.ACTIVE)
   }
 

From d86ce845840a92b4dde7975082738ed94ab8c570 Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <daoyuan.wang@intel.com>
Date: Tue, 12 May 2015 10:23:41 -0700
Subject: [PATCH 34/59] [SPARK-6876] [PySpark] [SQL] add DataFrame na.replace
 in pyspark

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #6003 from adrian-wang/pynareplace and squashes the following commits:

672efba [Daoyuan Wang] remove py2.7 feature
4a148f7 [Daoyuan Wang] to_replace support dict, value support single value, and add full tests
9e232e7 [Daoyuan Wang] rename scala map
af0268a [Daoyuan Wang] remove na
63ac579 [Daoyuan Wang] add na.replace in pyspark
---
 .../apache/spark/api/python/PythonUtils.scala |  7 ++
 python/pyspark/sql/dataframe.py               | 85 +++++++++++++++++++
 python/pyspark/sql/tests.py                   | 48 +++++++++++
 3 files changed, 140 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 9eff0a230cbe2..efb6b93cfc35d 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -53,4 +53,11 @@ private[spark] object PythonUtils {
   def toSeq[T](cols: JList[T]): Seq[T] = {
     cols.toList.toSeq
   }
+
+  /**
+   * Convert java map of K, V into Map of K, V (for calling API with varargs)
+   */
+  def toScalaMap[K, V](jm: java.util.Map[K, V]): Map[K, V] = {
+    jm.toMap
+  }
 }
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 72180f6d05fbc..078acfdf7e2df 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -578,6 +578,10 @@ def _jseq(self, cols, converter=None):
         """Return a JVM Seq of Columns from a list of Column or names"""
         return _to_seq(self.sql_ctx._sc, cols, converter)
 
+    def _jmap(self, jm):
+        """Return a JVM Scala Map from a dict"""
+        return _to_scala_map(self.sql_ctx._sc, jm)
+
     def _jcols(self, *cols):
         """Return a JVM Seq of Columns from a list of Column or column names
 
@@ -924,6 +928,80 @@ def fillna(self, value, subset=None):
 
             return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx)
 
+    def replace(self, to_replace, value, subset=None):
+        """Returns a new :class:`DataFrame` replacing a value with another value.
+
+        :param to_replace: int, long, float, string, or list.
+            Value to be replaced.
+            If the value is a dict, then `value` is ignored and `to_replace` must be a
+            mapping from column name (string) to replacement value. The value to be
+            replaced must be an int, long, float, or string.
+        :param value: int, long, float, string, or list.
+            Value to use to replace holes.
+            The replacement value must be an int, long, float, or string. If `value` is a
+            list or tuple, `value` should be of the same length with `to_replace`.
+        :param subset: optional list of column names to consider.
+            Columns specified in subset that do not have matching data type are ignored.
+            For example, if `value` is a string, and subset contains a non-string column,
+            then the non-string column is simply ignored.
+        >>> df4.replace(10, 20).show()
+        +----+------+-----+
+        | age|height| name|
+        +----+------+-----+
+        |  20|    80|Alice|
+        |   5|  null|  Bob|
+        |null|  null|  Tom|
+        |null|  null| null|
+        +----+------+-----+
+
+        >>> df4.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
+        +----+------+----+
+        | age|height|name|
+        +----+------+----+
+        |  10|    80|   A|
+        |   5|  null|   B|
+        |null|  null| Tom|
+        |null|  null|null|
+        +----+------+----+
+        """
+        if not isinstance(to_replace, (float, int, long, basestring, list, tuple, dict)):
+            raise ValueError(
+                "to_replace should be a float, int, long, string, list, tuple, or dict")
+
+        if not isinstance(value, (float, int, long, basestring, list, tuple)):
+            raise ValueError("value should be a float, int, long, string, list, or tuple")
+
+        rep_dict = dict()
+
+        if isinstance(to_replace, (float, int, long, basestring)):
+            to_replace = [to_replace]
+
+        if isinstance(to_replace, tuple):
+            to_replace = list(to_replace)
+
+        if isinstance(value, tuple):
+            value = list(value)
+
+        if isinstance(to_replace, list) and isinstance(value, list):
+            if len(to_replace) != len(value):
+                raise ValueError("to_replace and value lists should be of the same length")
+            rep_dict = dict(zip(to_replace, value))
+        elif isinstance(to_replace, list) and isinstance(value, (float, int, long, basestring)):
+            rep_dict = dict([(tr, value) for tr in to_replace])
+        elif isinstance(to_replace, dict):
+            rep_dict = to_replace
+
+        if subset is None:
+            return DataFrame(self._jdf.na().replace('*', rep_dict), self.sql_ctx)
+        elif isinstance(subset, basestring):
+            subset = [subset]
+
+        if not isinstance(subset, (list, tuple)):
+            raise ValueError("subset should be a list or tuple of column names")
+
+        return DataFrame(
+            self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx)
+
     def corr(self, col1, col2, method=None):
         """
         Calculates the correlation of two columns of a DataFrame as a double value. Currently only
@@ -1226,6 +1304,13 @@ def _to_seq(sc, cols, converter=None):
     return sc._jvm.PythonUtils.toSeq(cols)
 
 
+def _to_scala_map(sc, jm):
+    """
+    Convert a dict into a JVM Map.
+    """
+    return sc._jvm.PythonUtils.toScalaMap(jm)
+
+
 def _unary_op(name, doc="unary operator"):
     """ Create a method for given unary operator """
     def _(self):
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 7e63f4d6461f6..1922d03af61da 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -665,6 +665,54 @@ def test_bitwise_operations(self):
         result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
         self.assertEqual(~75, result['~b'])
 
+    def test_replace(self):
+        schema = StructType([
+            StructField("name", StringType(), True),
+            StructField("age", IntegerType(), True),
+            StructField("height", DoubleType(), True)])
+
+        # replace with int
+        row = self.sqlCtx.createDataFrame([(u'Alice', 10, 10.0)], schema).replace(10, 20).first()
+        self.assertEqual(row.age, 20)
+        self.assertEqual(row.height, 20.0)
+
+        # replace with double
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 80, 80.0)], schema).replace(80.0, 82.1).first()
+        self.assertEqual(row.age, 82)
+        self.assertEqual(row.height, 82.1)
+
+        # replace with string
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(u'Alice', u'Ann').first()
+        self.assertEqual(row.name, u"Ann")
+        self.assertEqual(row.age, 10)
+
+        # replace with subset specified by a string of a column name w/ actual change
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='age').first()
+        self.assertEqual(row.age, 20)
+
+        # replace with subset specified by a string of a column name w/o actual change
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='height').first()
+        self.assertEqual(row.age, 10)
+
+        # replace with subset specified with one column replaced, another column not in subset
+        # stays unchanged.
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 10.0)], schema).replace(10, 20, subset=['name', 'age']).first()
+        self.assertEqual(row.name, u'Alice')
+        self.assertEqual(row.age, 20)
+        self.assertEqual(row.height, 10.0)
+
+        # replace with subset specified but no column will be replaced
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, None)], schema).replace(10, 20, subset=['name', 'height']).first()
+        self.assertEqual(row.name, u'Alice')
+        self.assertEqual(row.age, 10)
+        self.assertEqual(row.height, None)
+
 
 class HiveContextSQLTests(ReusedPySparkTestCase):
 

From 831504cf6bde6b1131005d5552e56a842725c84c Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Tue, 12 May 2015 10:28:40 -0700
Subject: [PATCH 35/59] [DataFrame][minor] cleanup unapply methods in DataTypes

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6079 from cloud-fan/unapply and squashes the following commits:

40da442 [Wenchen Fan] one more
7d90a05 [Wenchen Fan] cleanup unapply in DataTypes
---
 .../org/apache/spark/sql/types/DataType.scala     | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 0992a7c311ee2..a0b261649f66f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -47,10 +47,7 @@ abstract class DataType {
    *     ...
    * }}}
    */
-  private[sql] def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType == this => true
-    case _ => false
-  }
+  private[sql] def unapply(e: Expression): Boolean = e.dataType == this
 
   /**
    * The default size of a value of this data type, used internally for size estimation.
@@ -137,10 +134,7 @@ private[sql] object IntegralType {
    *     ...
    * }}}
    */
-  def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType.isInstanceOf[IntegralType] => true
-    case _ => false
-  }
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[IntegralType]
 }
 
 
@@ -157,10 +151,7 @@ private[sql] object FractionalType {
    *     ...
    * }}}
    */
-  def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType.isInstanceOf[FractionalType] => true
-    case _ => false
-  }
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[FractionalType]
 }
 
 

From 0595b6de8f1da04baceda082553c2aa1aa2cb006 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 13 May 2015 01:32:28 +0800
Subject: [PATCH 36/59] [SPARK-3928] [SPARK-5182] [SQL] Partitioning support
 for the data sources API

This PR adds partitioning support for the external data sources API. It aims to simplify development of file system based data sources, and provide first class partitioning support for both read path and write path.  Existing data sources like JSON and Parquet can be simplified with this work.

## New features provided

1. Hive compatible partition discovery

   This actually generalizes the partition discovery strategy used in Parquet data source in Spark 1.3.0.

1. Generalized partition pruning optimization

   Now partition pruning is handled during physical planning phase.  Specific data sources don't need to worry about this harness anymore.

   (This also implies that we can remove `CatalystScan` after migrating the Parquet data source, since now we don't need to pass Catalyst expressions to data source implementations.)

1. Insertion with dynamic partitions

   When inserting data to a `FSBasedRelation`, data can be partitioned dynamically by specified partition columns.

## New structures provided

### Developer API

1. `FSBasedRelation`

   Base abstract class for file system based data sources.

1. `OutputWriter`

   Base abstract class for output row writers, responsible for writing a single row object.

1. `FSBasedRelationProvider`

   A new relation provider for `FSBasedRelation` subclasses. Note that data sources extending `FSBasedRelation` don't need to extend `RelationProvider` and `SchemaRelationProvider`.

### User API

New overloaded versions of

1. `DataFrame.save()`
1. `DataFrame.saveAsTable()`
1. `SQLContext.load()`

are provided to allow users to save/load DataFrames with user defined dynamic partition columns.

### Spark SQL query planning

1. `InsertIntoFSBasedRelation`

   Used to implement write path for `FSBasedRelation`s.

1. New rules for `FSBasedRelation` in `DataSourceStrategy`

   These are added to hook `FSBasedRelation` into physical query plan in read path, and perform partition pruning.

## TODO

- [ ] Use scratch directories when overwriting a table with data selected from itself.

      Currently, this is not supported, because the table been overwritten is always deleted before writing any data to it.

- [ ] When inserting with dynamic partition columns, use external sorter to group the data first.

      This ensures that we only need to open a single `OutputWriter` at a time.  For data sources like Parquet, `OutputWriter`s can be quite memory consuming.  One issue is that, this approach breaks the row distribution in the original DataFrame.  However, we did't promise to preserve data distribution when writing a DataFrame.

- [x] More tests.  Specifically, test cases for

      - [x] Self-join
      - [x] Loading partitioned relations with a subset of partition columns stored in data files.
      - [x] `SQLContext.load()` with user defined dynamic partition columns.

## Parquet data source migration

Parquet data source migration is covered in PR https://github.com/liancheng/spark/pull/6, which is against this PR branch and for preview only. A formal PR need to be made after this one is merged.

Author: Cheng Lian <lian@databricks.com>

Closes #5526 from liancheng/partitioning-support and squashes the following commits:

5351a1b [Cheng Lian] Fixes compilation error introduced while rebasing
1f9b1a5 [Cheng Lian] Tweaks data schema passed to FSBasedRelations
43ba50e [Cheng Lian] Avoids serializing generated projection code
edf49e7 [Cheng Lian] Removed commented stale code block
348a922 [Cheng Lian] Adds projection in FSBasedRelation.buildScan(requiredColumns, inputPaths)
ad4d4de [Cheng Lian] Enables HDFS style globbing
8d12e69 [Cheng Lian] Fixes compilation error
c71ac6c [Cheng Lian] Addresses comments from @marmbrus
7552168 [Cheng Lian] Fixes typo in MimaExclude.scala
0349e09 [Cheng Lian] Fixes compilation error introduced while rebasing
52b0c9b [Cheng Lian] Adjusts project/MimaExclude.scala
c466de6 [Cheng Lian] Addresses comments
bc3f9b4 [Cheng Lian] Uses projection to separate partition columns and data columns while inserting rows
795920a [Cheng Lian] Fixes compilation error after rebasing
0b8cd70 [Cheng Lian] Adds Scala/Catalyst row conversion when writing non-partitioned tables
fa543f3 [Cheng Lian] Addresses comments
5849dd0 [Cheng Lian] Fixes doc typos.  Fixes partition discovery refresh.
51be443 [Cheng Lian] Replaces FSBasedRelation.outputCommitterClass with FSBasedRelation.prepareForWrite
c4ed4fe [Cheng Lian] Bug fixes and a new test suite
a29e663 [Cheng Lian] Bug fix: should only pass actuall data files to FSBaseRelation.buildScan
5f423d3 [Cheng Lian] Bug fixes. Lets data source to customize OutputCommitter rather than OutputFormat
54c3d7b [Cheng Lian] Enforces that FileOutputFormat must be used
be0c268 [Cheng Lian] Uses TaskAttempContext rather than Configuration in OutputWriter.init
0bc6ad1 [Cheng Lian] Resorts to new Hadoop API, and now FSBasedRelation can customize output format class
f320766 [Cheng Lian] Adds prepareForWrite() hook, refactored writer containers
422ff4a [Cheng Lian] Fixes style issue
ce52353 [Cheng Lian] Adds new SQLContext.load() overload with user defined dynamic partition columns
8d2ff71 [Cheng Lian] Merges partition columns when reading partitioned relations
ca1805b [Cheng Lian] Removes duplicated partition discovery code in new Parquet
f18dec2 [Cheng Lian] More strict schema checking
b746ab5 [Cheng Lian] More tests
9b487bf [Cheng Lian] Fixes compilation errors introduced while rebasing
ea6c8dd [Cheng Lian] Removes remote debugging stuff
327bb1d [Cheng Lian] Implements partitioning support for data sources API
3c5073a [Cheng Lian] Fixes SaveModes used in test cases
fb5a607 [Cheng Lian] Fixes compilation error
9d17607 [Cheng Lian] Adds the contract that OutputWriter should have zero-arg constructor
5de194a [Cheng Lian] Forgot Apache licence header
95d0b4d [Cheng Lian] Renames PartitionedSchemaRelationProvider to FSBasedRelationProvider
770b5ba [Cheng Lian] Adds tests for FSBasedRelation
3ba9bbf [Cheng Lian] Adds DataFrame.saveAsTable() overrides which support partitioning
1b8231f [Cheng Lian] Renames FSBasedPrunedFilteredScan to FSBasedRelation
aa8ba9a [Cheng Lian] Javadoc fix
012ed2d [Cheng Lian] Adds PartitioningOptions
7dd8dd5 [Cheng Lian] Adds new interfaces and stub methods for data sources API partitioning support
---
 .../apache/spark/deploy/SparkHadoopUtil.scala |  52 +-
 project/MimaExcludes.scala                    |  14 +-
 .../org/apache/spark/sql/DataFrame.scala      | 107 +++-
 .../scala/org/apache/spark/sql/SQLConf.scala  |   6 +
 .../org/apache/spark/sql/SQLContext.scala     |  36 +-
 .../spark/sql/execution/SparkStrategies.scala |  10 +-
 .../apache/spark/sql/parquet/newParquet.scala | 182 +-----
 .../sql/sources/DataSourceStrategy.scala      | 191 ++++++-
 .../spark/sql/sources/PartitioningUtils.scala | 207 +++++++
 .../apache/spark/sql/sources/commands.scala   | 406 +++++++++++++-
 .../org/apache/spark/sql/sources/ddl.scala    |  90 ++-
 .../apache/spark/sql/sources/interfaces.scala | 283 +++++++++-
 .../org/apache/spark/sql/sources/rules.scala  |   8 +-
 .../ParquetPartitionDiscoverySuite.scala      |   3 +-
 .../sources/CreateTableAsSelectSuite.scala    |   4 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  13 +-
 .../spark/sql/hive/HiveStrategies.scala       |   6 +-
 .../spark/sql/hive/execution/commands.scala   |  27 +-
 .../spark/sql/hive/hiveWriterContainers.scala |   2 +-
 .../sql/sources/FSBasedRelationSuite.scala    | 525 ++++++++++++++++++
 .../sql/sources/SimpleTextRelation.scala      | 125 +++++
 21 files changed, 2042 insertions(+), 255 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index b563034457a91..7fa75ac8c2b54 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -22,22 +22,22 @@ import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
 import java.util.{Arrays, Comparator}
 
+import scala.collection.JavaConversions._
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
 import com.google.common.primitives.Longs
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 import org.apache.hadoop.fs.FileSystem.Statistics
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.JobContext
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
-import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
-
-import scala.collection.JavaConversions._
-import scala.concurrent.duration._
-import scala.language.postfixOps
+import org.apache.spark.{Logging, SparkConf, SparkException}
 
 /**
  * :: DeveloperApi ::
@@ -199,13 +199,43 @@ class SparkHadoopUtil extends Logging {
    * that file.
    */
   def listLeafStatuses(fs: FileSystem, basePath: Path): Seq[FileStatus] = {
-    def recurse(path: Path): Array[FileStatus] = {
-      val (directories, leaves) = fs.listStatus(path).partition(_.isDir)
-      leaves ++ directories.flatMap(f => listLeafStatuses(fs, f.getPath))
+    listLeafStatuses(fs, fs.getFileStatus(basePath))
+  }
+
+  /**
+   * Get [[FileStatus]] objects for all leaf children (files) under the given base path. If the
+   * given path points to a file, return a single-element collection containing [[FileStatus]] of
+   * that file.
+   */
+  def listLeafStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = {
+    def recurse(status: FileStatus): Seq[FileStatus] = {
+      val (directories, leaves) = fs.listStatus(status.getPath).partition(_.isDir)
+      leaves ++ directories.flatMap(f => listLeafStatuses(fs, f))
     }
 
-    val baseStatus = fs.getFileStatus(basePath)
-    if (baseStatus.isDir) recurse(basePath) else Array(baseStatus)
+    if (baseStatus.isDir) recurse(baseStatus) else Seq(baseStatus)
+  }
+
+  def listLeafDirStatuses(fs: FileSystem, basePath: Path): Seq[FileStatus] = {
+    listLeafDirStatuses(fs, fs.getFileStatus(basePath))
+  }
+
+  def listLeafDirStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = {
+    def recurse(status: FileStatus): Seq[FileStatus] = {
+      val (directories, files) = fs.listStatus(status.getPath).partition(_.isDir)
+      val leaves = if (directories.isEmpty) Seq(status) else Seq.empty[FileStatus]
+      leaves ++ directories.flatMap(dir => listLeafDirStatuses(fs, dir))
+    }
+
+    assert(baseStatus.isDir)
+    recurse(baseStatus)
+  }
+
+  def globPath(pattern: Path): Seq[Path] = {
+    val fs = pattern.getFileSystem(conf)
+    Option(fs.globStatus(pattern)).map { statuses =>
+      statuses.map(_.getPath.makeQualified(fs.getUri, fs.getWorkingDirectory)).toSeq
+    }.getOrElse(Seq.empty[Path])
   }
 
   /**
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index ad3d8426bdedd..a47e29e2ef365 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -94,11 +94,23 @@ object MimaExcludes {
             // This `protected[sql]` method was removed in 1.3.1
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.sql.SQLContext.checkAnalysis"),
-            // This `private[sql]` class was removed in 1.4.0:
+            // These `private[sql]` class were removed in 1.4.0:
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.execution.AddExchange"),
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.execution.AddExchange$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.PartitionSpec"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.PartitionSpec$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.Partition"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.Partition$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues$"),
             // These test support classes were moved out of src/main and into src/test:
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.ParquetTestData"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 265a61592b943..f3107f7b51ad8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -27,23 +27,23 @@ import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
 import com.fasterxml.jackson.core.JsonFactory
-
 import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, SqlParser}
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, ResolvedStar}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, UnresolvedAttribute, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
-import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, _}
+import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
+import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters, ScalaReflection, SqlParser}
 import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD}
 import org.apache.spark.sql.jdbc.JDBCWriteDetails
 import org.apache.spark.sql.json.JacksonGenerator
+import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, ResolvedDataSource}
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 
@@ -400,7 +400,9 @@ class DataFrame private[sql](
         joined.left,
         joined.right,
         joinType = Inner,
-        Some(EqualTo(joined.left.resolve(usingColumn), joined.right.resolve(usingColumn))))
+        Some(expressions.EqualTo(
+          joined.left.resolve(usingColumn),
+          joined.right.resolve(usingColumn))))
     )
   }
 
@@ -465,8 +467,8 @@ class DataFrame private[sql](
     // By the time we get here, since we have already run analysis, all attributes should've been
     // resolved and become AttributeReference.
     val cond = plan.condition.map { _.transform {
-      case EqualTo(a: AttributeReference, b: AttributeReference) if a.sameRef(b) =>
-        EqualTo(plan.left.resolve(a.name), plan.right.resolve(b.name))
+      case expressions.EqualTo(a: AttributeReference, b: AttributeReference) if a.sameRef(b) =>
+        expressions.EqualTo(plan.left.resolve(a.name), plan.right.resolve(b.name))
     }}
     plan.copy(condition = cond)
   }
@@ -1324,6 +1326,28 @@ class DataFrame private[sql](
     saveAsTable(tableName, source, mode, options.toMap)
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a table at the given path from the the contents of this DataFrame
+   * based on a given data source, [[SaveMode]] specified by mode, a set of options, and a list of
+   * partition columns.
+   *
+   * Note that this currently only works with DataFrames that are created from a HiveContext as
+   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
+   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
+   * be the target of an `insertInto`.
+   * @group output
+   */
+  @Experimental
+  def saveAsTable(
+      tableName: String,
+      source: String,
+      mode: SaveMode,
+      options: java.util.Map[String, String],
+      partitionColumns: java.util.List[String]): Unit = {
+    saveAsTable(tableName, source, mode, options.toMap, partitionColumns)
+  }
+
   /**
    * :: Experimental ::
    * (Scala-specific)
@@ -1350,6 +1374,7 @@ class DataFrame private[sql](
         tableName,
         source,
         temporary = false,
+        Array.empty[String],
         mode,
         options,
         logicalPlan)
@@ -1357,6 +1382,36 @@ class DataFrame private[sql](
     sqlContext.executePlan(cmd).toRdd
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a table at the given path from the the contents of this DataFrame
+   * based on a given data source, [[SaveMode]] specified by mode, a set of options, and a list of
+   * partition columns.
+   *
+   * Note that this currently only works with DataFrames that are created from a HiveContext as
+   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
+   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
+   * be the target of an `insertInto`.
+   * @group output
+   */
+  @Experimental
+  def saveAsTable(
+      tableName: String,
+      source: String,
+      mode: SaveMode,
+      options: Map[String, String],
+      partitionColumns: Seq[String]): Unit = {
+    sqlContext.executePlan(
+      CreateTableUsingAsSelect(
+        tableName,
+        source,
+        temporary = false,
+        partitionColumns.toArray,
+        mode,
+        options,
+        logicalPlan)).toRdd
+  }
+
   /**
    * :: Experimental ::
    * Saves the contents of this DataFrame to the given path,
@@ -1417,6 +1472,21 @@ class DataFrame private[sql](
     save(source, mode, options.toMap)
   }
 
+  /**
+   * :: Experimental ::
+   * Saves the contents of this DataFrame to the given path based on the given data source,
+   * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
+   * @group output
+   */
+  @Experimental
+  def save(
+      source: String,
+      mode: SaveMode,
+      options: java.util.Map[String, String],
+      partitionColumns: java.util.List[String]): Unit = {
+    save(source, mode, options.toMap, partitionColumns)
+  }
+
   /**
    * :: Experimental ::
    * (Scala-specific)
@@ -1429,7 +1499,22 @@ class DataFrame private[sql](
       source: String,
       mode: SaveMode,
       options: Map[String, String]): Unit = {
-    ResolvedDataSource(sqlContext, source, mode, options, this)
+    ResolvedDataSource(sqlContext, source, Array.empty[String], mode, options, this)
+  }
+
+  /**
+   * :: Experimental ::
+   * Saves the contents of this DataFrame to the given path based on the given data source,
+   * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
+   * @group output
+   */
+  @Experimental
+  def save(
+      source: String,
+      mode: SaveMode,
+      options: Map[String, String],
+      partitionColumns: Seq[String]): Unit = {
+    ResolvedDataSource(sqlContext, source, partitionColumns.toArray, mode, options, this)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index dcac97beafb04..f07bb196c11ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -66,6 +66,9 @@ private[spark] object SQLConf {
   // to its length exceeds the threshold.
   val SCHEMA_STRING_LENGTH_THRESHOLD = "spark.sql.sources.schemaStringLengthThreshold"
 
+  // Whether to perform partition discovery when loading external data sources.  Default to true.
+  val PARTITION_DISCOVERY_ENABLED = "spark.sql.sources.partitionDiscovery.enabled"
+
   // Whether to perform eager analysis when constructing a dataframe.
   // Set to false when debugging requires the ability to look at invalid query plans.
   val DATAFRAME_EAGER_ANALYSIS = "spark.sql.eagerAnalysis"
@@ -235,6 +238,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
   private[spark] def defaultDataSourceName: String =
     getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet")
 
+  private[spark] def partitionDiscoveryEnabled() =
+    getConf(SQLConf.PARTITION_DISCOVERY_ENABLED, "true").toBoolean
+
   // Do not use a value larger than 4000 as the default value of this property.
   // See the comments of SCHEMA_STRING_LENGTH_THRESHOLD above for more information.
   private[spark] def schemaStringLengthThreshold: Int =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 648021806f3ee..afee09adaae20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -762,7 +762,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def load(source: String, options: Map[String, String]): DataFrame = {
-    val resolved = ResolvedDataSource(this, None, source, options)
+    val resolved = ResolvedDataSource(this, None, Array.empty[String], source, options)
     DataFrame(this, LogicalRelation(resolved.relation))
   }
 
@@ -781,6 +781,37 @@ class SQLContext(@transient val sparkContext: SparkContext)
     load(source, schema, options.toMap)
   }
 
+  /**
+   * :: Experimental ::
+   * (Java-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+   *
+   * @group genericdata
+   */
+  @Experimental
+  def load(
+      source: String,
+      schema: StructType,
+      partitionColumns: Array[String],
+      options: java.util.Map[String, String]): DataFrame = {
+    load(source, schema, partitionColumns, options.toMap)
+  }
+
+  /**
+   * :: Experimental ::
+   * (Scala-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+   * @group genericdata
+   */
+  @Experimental
+  def load(
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): DataFrame = {
+    val resolved = ResolvedDataSource(this, Some(schema), Array.empty[String], source, options)
+    DataFrame(this, LogicalRelation(resolved.relation))
+  }
+
   /**
    * :: Experimental ::
    * (Scala-specific) Returns the dataset specified by the given data source and
@@ -791,8 +822,9 @@ class SQLContext(@transient val sparkContext: SparkContext)
   def load(
       source: String,
       schema: StructType,
+      partitionColumns: Array[String],
       options: Map[String, String]): DataFrame = {
-    val resolved = ResolvedDataSource(this, Some(schema), source, options)
+    val resolved = ResolvedDataSource(this, Some(schema), partitionColumns, source, options)
     DataFrame(this, LogicalRelation(resolved.relation))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 56a4689eb58f0..af0029cb84f9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -343,9 +343,13 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case c: CreateTableUsing if c.temporary && c.allowExisting =>
         sys.error("allowExisting should be set to false when creating a temporary table.")
 
-      case CreateTableUsingAsSelect(tableName, provider, true, mode, opts, query) =>
-        val cmd =
-          CreateTempTableUsingAsSelect(tableName, provider, mode, opts, query)
+      case CreateTableUsingAsSelect(tableName, provider, true, partitionsCols, mode, opts, query)
+          if partitionsCols.nonEmpty =>
+        sys.error("Cannot create temporary partitioned table.")
+
+      case CreateTableUsingAsSelect(tableName, provider, true, _, mode, opts, query) =>
+        val cmd = CreateTempTableUsingAsSelect(
+          tableName, provider, Array.empty[String], mode, opts, query)
         ExecutedCommand(cmd) :: Nil
       case c: CreateTableUsingAsSelect if !c.temporary =>
         sys.error("Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 85e60733bc57a..ee4b1c72a2148 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -136,10 +136,6 @@ private[sql] class DefaultSource
   }
 }
 
-private[sql] case class Partition(values: Row, path: String)
-
-private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition])
-
 /**
  * An alternative to [[ParquetRelation]] that plugs in using the data sources API.  This class is
  * intended as a full replacement of the Parquet support in Spark SQL.  The old implementation will
@@ -307,7 +303,7 @@ private[sql] case class ParquetRelation2(
 
         if (partitionDirs.nonEmpty) {
           // Parses names and values of partition columns, and infer their data types.
-          ParquetRelation2.parsePartitions(partitionDirs, defaultPartitionName)
+          PartitioningUtils.parsePartitions(partitionDirs, defaultPartitionName)
         } else {
           // No partition directories found, makes an empty specification
           PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[Partition])
@@ -805,7 +801,7 @@ private[sql] object ParquetRelation2 extends Logging {
     val ordinalMap = metastoreSchema.zipWithIndex.map {
       case (field, index) => field.name.toLowerCase -> index
     }.toMap
-    val reorderedParquetSchema = mergedParquetSchema.sortBy(f => 
+    val reorderedParquetSchema = mergedParquetSchema.sortBy(f =>
       ordinalMap.getOrElse(f.name.toLowerCase, metastoreSchema.size + 1))
 
     StructType(metastoreSchema.zip(reorderedParquetSchema).map {
@@ -841,178 +837,4 @@ private[sql] object ParquetRelation2 extends Logging {
       .filter(_.nullable)
     StructType(parquetSchema ++ missingFields)
   }
-
-
-  // TODO Data source implementations shouldn't touch Catalyst types (`Literal`).
-  // However, we are already using Catalyst expressions for partition pruning and predicate
-  // push-down here...
-  private[parquet] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) {
-    require(columnNames.size == literals.size)
-  }
-
-  /**
-   * Given a group of qualified paths, tries to parse them and returns a partition specification.
-   * For example, given:
-   * {{{
-   *   hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14
-   *   hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28
-   * }}}
-   * it returns:
-   * {{{
-   *   PartitionSpec(
-   *     partitionColumns = StructType(
-   *       StructField(name = "a", dataType = IntegerType, nullable = true),
-   *       StructField(name = "b", dataType = StringType, nullable = true),
-   *       StructField(name = "c", dataType = DoubleType, nullable = true)),
-   *     partitions = Seq(
-   *       Partition(
-   *         values = Row(1, "hello", 3.14),
-   *         path = "hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14"),
-   *       Partition(
-   *         values = Row(2, "world", 6.28),
-   *         path = "hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28")))
-   * }}}
-   */
-  private[parquet] def parsePartitions(
-      paths: Seq[Path],
-      defaultPartitionName: String): PartitionSpec = {
-    val partitionValues = resolvePartitions(paths.map(parsePartition(_, defaultPartitionName)))
-    val fields = {
-      val (PartitionValues(columnNames, literals)) = partitionValues.head
-      columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
-        StructField(name, dataType, nullable = true)
-      }
-    }
-
-    val partitions = partitionValues.zip(paths).map {
-      case (PartitionValues(_, literals), path) =>
-        Partition(Row(literals.map(_.value): _*), path.toString)
-    }
-
-    PartitionSpec(StructType(fields), partitions)
-  }
-
-  /**
-   * Parses a single partition, returns column names and values of each partition column.  For
-   * example, given:
-   * {{{
-   *   path = hdfs://<host>:<port>/path/to/partition/a=42/b=hello/c=3.14
-   * }}}
-   * it returns:
-   * {{{
-   *   PartitionValues(
-   *     Seq("a", "b", "c"),
-   *     Seq(
-   *       Literal.create(42, IntegerType),
-   *       Literal.create("hello", StringType),
-   *       Literal.create(3.14, FloatType)))
-   * }}}
-   */
-  private[parquet] def parsePartition(
-      path: Path,
-      defaultPartitionName: String): PartitionValues = {
-    val columns = ArrayBuffer.empty[(String, Literal)]
-    // Old Hadoop versions don't have `Path.isRoot`
-    var finished = path.getParent == null
-    var chopped = path
-
-    while (!finished) {
-      val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName)
-      maybeColumn.foreach(columns += _)
-      chopped = chopped.getParent
-      finished = maybeColumn.isEmpty || chopped.getParent == null
-    }
-
-    val (columnNames, values) = columns.reverse.unzip
-    PartitionValues(columnNames, values)
-  }
-
-  private def parsePartitionColumn(
-      columnSpec: String,
-      defaultPartitionName: String): Option[(String, Literal)] = {
-    val equalSignIndex = columnSpec.indexOf('=')
-    if (equalSignIndex == -1) {
-      None
-    } else {
-      val columnName = columnSpec.take(equalSignIndex)
-      assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'")
-
-      val rawColumnValue = columnSpec.drop(equalSignIndex + 1)
-      assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'")
-
-      val literal = inferPartitionColumnValue(rawColumnValue, defaultPartitionName)
-      Some(columnName -> literal)
-    }
-  }
-
-  /**
-   * Resolves possible type conflicts between partitions by up-casting "lower" types.  The up-
-   * casting order is:
-   * {{{
-   *   NullType ->
-   *   IntegerType -> LongType ->
-   *   FloatType -> DoubleType -> DecimalType.Unlimited ->
-   *   StringType
-   * }}}
-   */
-  private[parquet] def resolvePartitions(values: Seq[PartitionValues]): Seq[PartitionValues] = {
-    // Column names of all partitions must match
-    val distinctPartitionsColNames = values.map(_.columnNames).distinct
-    assert(distinctPartitionsColNames.size == 1, {
-      val list = distinctPartitionsColNames.mkString("\t", "\n", "")
-      s"Conflicting partition column names detected:\n$list"
-    })
-
-    // Resolves possible type conflicts for each column
-    val columnCount = values.head.columnNames.size
-    val resolvedValues = (0 until columnCount).map { i =>
-      resolveTypeConflicts(values.map(_.literals(i)))
-    }
-
-    // Fills resolved literals back to each partition
-    values.zipWithIndex.map { case (d, index) =>
-      d.copy(literals = resolvedValues.map(_(index)))
-    }
-  }
-
-  /**
-   * Converts a string to a `Literal` with automatic type inference.  Currently only supports
-   * [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and
-   * [[StringType]].
-   */
-  private[parquet] def inferPartitionColumnValue(
-      raw: String,
-      defaultPartitionName: String): Literal = {
-    // First tries integral types
-    Try(Literal.create(Integer.parseInt(raw), IntegerType))
-      .orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))
-      // Then falls back to fractional types
-      .orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType)))
-      .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType)))
-      .orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited)))
-      // Then falls back to string
-      .getOrElse {
-        if (raw == defaultPartitionName) Literal.create(null, NullType)
-        else Literal.create(raw, StringType)
-      }
-  }
-
-  private val upCastingOrder: Seq[DataType] =
-    Seq(NullType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited, StringType)
-
-  /**
-   * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower"
-   * types.
-   */
-  private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = {
-    val desiredType = {
-      val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
-      // Falls back to string if all values of this column are null or empty string
-      if (topType == NullType) StringType else topType
-    }
-
-    literals.map { case l @ Literal(_, dataType) =>
-      Literal.create(Cast(l, desiredType).eval(), desiredType)
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
index b3d71f687a60a..a5410cda0fe6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -17,20 +17,25 @@
 
 package org.apache.spark.sql.sources
 
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.Logging
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.types.{UTF8String, StringType}
-import org.apache.spark.sql.{Row, Strategy, execution, sources}
+import org.apache.spark.sql.types.{StructType, UTF8String, StringType}
+import org.apache.spark.sql._
 
 /**
  * A Strategy for planning scans over data sources defined using the sources API.
  */
-private[sql] object DataSourceStrategy extends Strategy {
+private[sql] object DataSourceStrategy extends Strategy with Logging {
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
     case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: CatalystScan)) =>
       pruneFilterProjectRaw(
@@ -53,6 +58,51 @@ private[sql] object DataSourceStrategy extends Strategy {
         filters,
         (a, _) => t.buildScan(a)) :: Nil
 
+    // Scanning partitioned FSBasedRelation
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: FSBasedRelation))
+        if t.partitionSpec.partitionColumns.nonEmpty =>
+      val selectedPartitions = prunePartitions(filters, t.partitionSpec).toArray
+
+      logInfo {
+        val total = t.partitionSpec.partitions.length
+        val selected = selectedPartitions.length
+        val percentPruned = (1 - total.toDouble / selected.toDouble) * 100
+        s"Selected $selected partitions out of $total, pruned $percentPruned% partitions."
+      }
+
+      // Only pushes down predicates that do not reference partition columns.
+      val pushedFilters = {
+        val partitionColumnNames = t.partitionSpec.partitionColumns.map(_.name).toSet
+        filters.filter { f =>
+          val referencedColumnNames = f.references.map(_.name).toSet
+          referencedColumnNames.intersect(partitionColumnNames).isEmpty
+        }
+      }
+
+      buildPartitionedTableScan(
+        l,
+        projectList,
+        pushedFilters,
+        t.partitionSpec.partitionColumns,
+        selectedPartitions) :: Nil
+
+    // Scanning non-partitioned FSBasedRelation
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: FSBasedRelation)) =>
+      val inputPaths = t.paths.map(new Path(_)).flatMap { path =>
+        val fs = path.getFileSystem(t.sqlContext.sparkContext.hadoopConfiguration)
+        val qualifiedPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        SparkHadoopUtil.get.listLeafStatuses(fs, qualifiedPath).map(_.getPath).filterNot { path =>
+          val name = path.getName
+          name.startsWith("_") || name.startsWith(".")
+        }.map(fs.makeQualified(_).toString)
+      }
+
+      pruneFilterProject(
+        l,
+        projectList,
+        filters,
+        (a, f) => t.buildScan(a, f, inputPaths)) :: Nil
+
     case l @ LogicalRelation(t: TableScan) =>
       createPhysicalRDD(l.relation, l.output, t.buildScan()) :: Nil
 
@@ -60,9 +110,144 @@ private[sql] object DataSourceStrategy extends Strategy {
       l @ LogicalRelation(t: InsertableRelation), part, query, overwrite, false) if part.isEmpty =>
       execution.ExecutedCommand(InsertIntoDataSource(l, query, overwrite)) :: Nil
 
+    case i @ logical.InsertIntoTable(
+      l @ LogicalRelation(t: FSBasedRelation), part, query, overwrite, false) if part.isEmpty =>
+      val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
+      execution.ExecutedCommand(
+        InsertIntoFSBasedRelation(t, query, Array.empty[String], mode)) :: Nil
+
     case _ => Nil
   }
 
+  private def buildPartitionedTableScan(
+      logicalRelation: LogicalRelation,
+      projections: Seq[NamedExpression],
+      filters: Seq[Expression],
+      partitionColumns: StructType,
+      partitions: Array[Partition]) = {
+    val output = projections.map(_.toAttribute)
+    val relation = logicalRelation.relation.asInstanceOf[FSBasedRelation]
+
+    // Builds RDD[Row]s for each selected partition.
+    val perPartitionRows = partitions.map { case Partition(partitionValues, dir) =>
+      // Paths to all data files within this partition
+      val dataFilePaths = {
+        val dirPath = new Path(dir)
+        val fs = dirPath.getFileSystem(SparkHadoopUtil.get.conf)
+        fs.listStatus(dirPath).map(_.getPath).filterNot { path =>
+          val name = path.getName
+          name.startsWith("_") || name.startsWith(".")
+        }.map(fs.makeQualified(_).toString)
+      }
+
+      // The table scan operator (PhysicalRDD) which retrieves required columns from data files.
+      // Notice that the schema of data files, represented by `relation.dataSchema`, may contain
+      // some partition column(s).
+      val scan =
+        pruneFilterProject(
+          logicalRelation,
+          projections,
+          filters,
+          (requiredColumns, filters) => {
+            val partitionColNames = partitionColumns.fieldNames
+
+            // Don't scan any partition columns to save I/O.  Here we are being optimistic and
+            // assuming partition columns data stored in data files are always consistent with those
+            // partition values encoded in partition directory paths.
+            val nonPartitionColumns = requiredColumns.filterNot(partitionColNames.contains)
+            val dataRows = relation.buildScan(nonPartitionColumns, filters, dataFilePaths)
+
+            // Merges data values with partition values.
+            mergeWithPartitionValues(
+              relation.schema,
+              requiredColumns,
+              partitionColNames,
+              partitionValues,
+              dataRows)
+          })
+
+      scan.execute()
+    }
+
+    val unionedRows = perPartitionRows.reduceOption(_ ++ _).getOrElse {
+      relation.sqlContext.emptyResult
+    }
+
+    createPhysicalRDD(logicalRelation.relation, output, unionedRows)
+  }
+
+  private def mergeWithPartitionValues(
+      schema: StructType,
+      requiredColumns: Array[String],
+      partitionColumns: Array[String],
+      partitionValues: Row,
+      dataRows: RDD[Row]): RDD[Row] = {
+    val nonPartitionColumns = requiredColumns.filterNot(partitionColumns.contains)
+
+    // If output columns contain any partition column(s), we need to merge scanned data
+    // columns and requested partition columns to form the final result.
+    if (!requiredColumns.sameElements(nonPartitionColumns)) {
+      val mergers = requiredColumns.zipWithIndex.map { case (name, index) =>
+        // To see whether the `index`-th column is a partition column...
+        val i = partitionColumns.indexOf(name)
+        if (i != -1) {
+          // If yes, gets column value from partition values.
+          (mutableRow: MutableRow, dataRow: expressions.Row, ordinal: Int) => {
+            mutableRow(ordinal) = partitionValues(i)
+          }
+        } else {
+          // Otherwise, inherits the value from scanned data.
+          val i = nonPartitionColumns.indexOf(name)
+          (mutableRow: MutableRow, dataRow: expressions.Row, ordinal: Int) => {
+            mutableRow(ordinal) = dataRow(i)
+          }
+        }
+      }
+
+      dataRows.mapPartitions { iterator =>
+        val dataTypes = requiredColumns.map(schema(_).dataType)
+        val mutableRow = new SpecificMutableRow(dataTypes)
+        iterator.map { dataRow =>
+          var i = 0
+          while (i < mutableRow.length) {
+            mergers(i)(mutableRow, dataRow, i)
+            i += 1
+          }
+          mutableRow.asInstanceOf[expressions.Row]
+        }
+      }
+    } else {
+      dataRows
+    }
+  }
+
+  protected def prunePartitions(
+      predicates: Seq[Expression],
+      partitionSpec: PartitionSpec): Seq[Partition] = {
+    val PartitionSpec(partitionColumns, partitions) = partitionSpec
+    val partitionColumnNames = partitionColumns.map(_.name).toSet
+    val partitionPruningPredicates = predicates.filter {
+      _.references.map(_.name).toSet.subsetOf(partitionColumnNames)
+    }
+
+    if (partitionPruningPredicates.nonEmpty) {
+      val predicate =
+        partitionPruningPredicates
+          .reduceOption(expressions.And)
+          .getOrElse(Literal(true))
+
+      val boundPredicate = InterpretedPredicate.create(predicate.transform {
+        case a: AttributeReference =>
+          val index = partitionColumns.indexWhere(a.name == _.name)
+          BoundReference(index, partitionColumns(index).dataType, nullable = true)
+      })
+
+      partitions.filter { case Partition(values, _) => boundPredicate(values) }
+    } else {
+      partitions
+    }
+  }
+
   // Based on Public API.
   protected def pruneFilterProject(
       relation: LogicalRelation,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
new file mode 100644
index 0000000000000..d30f7f65e21c0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.lang.{Double => JDouble, Float => JFloat, Long => JLong}
+import java.math.{BigDecimal => JBigDecimal}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.util.Try
+
+import com.google.common.cache.{CacheBuilder, Cache}
+import org.apache.hadoop.fs.{FileStatus, Path}
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
+import org.apache.spark.sql.types._
+
+private[sql] case class Partition(values: Row, path: String)
+
+private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition])
+
+private[sql] object PartitioningUtils {
+  private[sql] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) {
+    require(columnNames.size == literals.size)
+  }
+
+  /**
+   * Given a group of qualified paths, tries to parse them and returns a partition specification.
+   * For example, given:
+   * {{{
+   *   hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14
+   *   hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28
+   * }}}
+   * it returns:
+   * {{{
+   *   PartitionSpec(
+   *     partitionColumns = StructType(
+   *       StructField(name = "a", dataType = IntegerType, nullable = true),
+   *       StructField(name = "b", dataType = StringType, nullable = true),
+   *       StructField(name = "c", dataType = DoubleType, nullable = true)),
+   *     partitions = Seq(
+   *       Partition(
+   *         values = Row(1, "hello", 3.14),
+   *         path = "hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14"),
+   *       Partition(
+   *         values = Row(2, "world", 6.28),
+   *         path = "hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28")))
+   * }}}
+   */
+  private[sql] def parsePartitions(
+      paths: Seq[Path],
+      defaultPartitionName: String): PartitionSpec = {
+    val partitionValues = resolvePartitions(paths.map(parsePartition(_, defaultPartitionName)))
+    val fields = {
+      val (PartitionValues(columnNames, literals)) = partitionValues.head
+      columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
+        StructField(name, dataType, nullable = true)
+      }
+    }
+
+    val partitions = partitionValues.zip(paths).map {
+      case (PartitionValues(_, literals), path) =>
+        Partition(Row(literals.map(_.value): _*), path.toString)
+    }
+
+    PartitionSpec(StructType(fields), partitions)
+  }
+
+  /**
+   * Parses a single partition, returns column names and values of each partition column.  For
+   * example, given:
+   * {{{
+   *   path = hdfs://<host>:<port>/path/to/partition/a=42/b=hello/c=3.14
+   * }}}
+   * it returns:
+   * {{{
+   *   PartitionValues(
+   *     Seq("a", "b", "c"),
+   *     Seq(
+   *       Literal.create(42, IntegerType),
+   *       Literal.create("hello", StringType),
+   *       Literal.create(3.14, FloatType)))
+   * }}}
+   */
+  private[sql] def parsePartition(
+      path: Path,
+      defaultPartitionName: String): PartitionValues = {
+    val columns = ArrayBuffer.empty[(String, Literal)]
+    // Old Hadoop versions don't have `Path.isRoot`
+    var finished = path.getParent == null
+    var chopped = path
+
+    while (!finished) {
+      val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName)
+      maybeColumn.foreach(columns += _)
+      chopped = chopped.getParent
+      finished = maybeColumn.isEmpty || chopped.getParent == null
+    }
+
+    val (columnNames, values) = columns.reverse.unzip
+    PartitionValues(columnNames, values)
+  }
+
+  private def parsePartitionColumn(
+      columnSpec: String,
+      defaultPartitionName: String): Option[(String, Literal)] = {
+    val equalSignIndex = columnSpec.indexOf('=')
+    if (equalSignIndex == -1) {
+      None
+    } else {
+      val columnName = columnSpec.take(equalSignIndex)
+      assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'")
+
+      val rawColumnValue = columnSpec.drop(equalSignIndex + 1)
+      assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'")
+
+      val literal = inferPartitionColumnValue(rawColumnValue, defaultPartitionName)
+      Some(columnName -> literal)
+    }
+  }
+
+  /**
+   * Resolves possible type conflicts between partitions by up-casting "lower" types.  The up-
+   * casting order is:
+   * {{{
+   *   NullType ->
+   *   IntegerType -> LongType ->
+   *   FloatType -> DoubleType -> DecimalType.Unlimited ->
+   *   StringType
+   * }}}
+   */
+  private[sql] def resolvePartitions(values: Seq[PartitionValues]): Seq[PartitionValues] = {
+    // Column names of all partitions must match
+    val distinctPartitionsColNames = values.map(_.columnNames).distinct
+    assert(distinctPartitionsColNames.size == 1, {
+      val list = distinctPartitionsColNames.mkString("\t", "\n", "")
+      s"Conflicting partition column names detected:\n$list"
+    })
+
+    // Resolves possible type conflicts for each column
+    val columnCount = values.head.columnNames.size
+    val resolvedValues = (0 until columnCount).map { i =>
+      resolveTypeConflicts(values.map(_.literals(i)))
+    }
+
+    // Fills resolved literals back to each partition
+    values.zipWithIndex.map { case (d, index) =>
+      d.copy(literals = resolvedValues.map(_(index)))
+    }
+  }
+
+  /**
+   * Converts a string to a `Literal` with automatic type inference.  Currently only supports
+   * [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and
+   * [[StringType]].
+   */
+  private[sql] def inferPartitionColumnValue(
+      raw: String,
+      defaultPartitionName: String): Literal = {
+    // First tries integral types
+    Try(Literal.create(Integer.parseInt(raw), IntegerType))
+      .orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))
+      // Then falls back to fractional types
+      .orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType)))
+      .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType)))
+      .orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited)))
+      // Then falls back to string
+      .getOrElse {
+        if (raw == defaultPartitionName) Literal.create(null, NullType)
+        else Literal.create(raw, StringType)
+      }
+  }
+
+  private val upCastingOrder: Seq[DataType] =
+    Seq(NullType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited, StringType)
+
+  /**
+   * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower"
+   * types.
+   */
+  private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = {
+    val desiredType = {
+      val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
+      // Falls back to string if all values of this column are null or empty string
+      if (topType == NullType) StringType else topType
+    }
+
+    literals.map { case l @ Literal(_, dataType) =>
+      Literal.create(Cast(l, desiredType).eval(), desiredType)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index dbdb0d39c26a1..127133bfaf662 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -14,12 +14,28 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.sql.sources
 
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import java.util.Date
+
+import scala.collection.mutable
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter, FileOutputFormat}
+import org.apache.hadoop.util.Shell
+import parquet.hadoop.util.ContextUtil
+
+import org.apache.spark._
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateProjection
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.RunnableCommand
+import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
 
 private[sql] case class InsertIntoDataSource(
     logicalRelation: LogicalRelation,
@@ -41,3 +57,391 @@ private[sql] case class InsertIntoDataSource(
     Seq.empty[Row]
   }
 }
+
+private[sql] case class InsertIntoFSBasedRelation(
+    @transient relation: FSBasedRelation,
+    @transient query: LogicalPlan,
+    partitionColumns: Array[String],
+    mode: SaveMode)
+  extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    require(
+      relation.paths.length == 1,
+      s"Cannot write to multiple destinations: ${relation.paths.mkString(",")}")
+
+    val hadoopConf = sqlContext.sparkContext.hadoopConfiguration
+    val outputPath = new Path(relation.paths.head)
+    val fs = outputPath.getFileSystem(hadoopConf)
+    val qualifiedOutputPath = outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+
+    val doInsertion = (mode, fs.exists(qualifiedOutputPath)) match {
+      case (SaveMode.ErrorIfExists, true) =>
+        sys.error(s"path $qualifiedOutputPath already exists.")
+      case (SaveMode.Overwrite, true) =>
+        fs.delete(qualifiedOutputPath, true)
+        true
+      case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
+        true
+      case (SaveMode.Ignore, exists) =>
+        !exists
+    }
+
+    if (doInsertion) {
+      val job = Job.getInstance(hadoopConf)
+      job.setOutputKeyClass(classOf[Void])
+      job.setOutputValueClass(classOf[Row])
+      FileOutputFormat.setOutputPath(job, qualifiedOutputPath)
+
+      val df = sqlContext.createDataFrame(
+        DataFrame(sqlContext, query).queryExecution.toRdd,
+        relation.schema,
+        needsConversion = false)
+
+      if (partitionColumns.isEmpty) {
+        insert(new DefaultWriterContainer(relation, job), df)
+      } else {
+        val writerContainer = new DynamicPartitionWriterContainer(
+          relation, job, partitionColumns, "__HIVE_DEFAULT_PARTITION__")
+        insertWithDynamicPartitions(sqlContext, writerContainer, df, partitionColumns)
+      }
+    }
+
+    Seq.empty[Row]
+  }
+
+  private def insert(writerContainer: BaseWriterContainer, df: DataFrame): Unit = {
+    // Uses local vals for serialization
+    val needsConversion = relation.needConversion
+    val dataSchema = relation.dataSchema
+
+    try {
+      writerContainer.driverSideSetup()
+      df.sqlContext.sparkContext.runJob(df.queryExecution.executedPlan.execute(), writeRows _)
+      writerContainer.commitJob()
+      relation.refresh()
+    } catch { case cause: Throwable =>
+      writerContainer.abortJob()
+      throw new SparkException("Job aborted.", cause)
+    }
+
+    def writeRows(taskContext: TaskContext, iterator: Iterator[Row]): Unit = {
+      writerContainer.executorSideSetup(taskContext)
+
+      try {
+        if (needsConversion) {
+          val converter = CatalystTypeConverters.createToScalaConverter(dataSchema)
+          while (iterator.hasNext) {
+            val row = converter(iterator.next()).asInstanceOf[Row]
+            writerContainer.outputWriterForRow(row).write(row)
+          }
+        } else {
+          while (iterator.hasNext) {
+            val row = iterator.next()
+            writerContainer.outputWriterForRow(row).write(row)
+          }
+        }
+        writerContainer.commitTask()
+      } catch { case cause: Throwable =>
+        writerContainer.abortTask()
+        throw new SparkException("Task failed while writing rows.", cause)
+      }
+    }
+  }
+
+  private def insertWithDynamicPartitions(
+      sqlContext: SQLContext,
+      writerContainer: BaseWriterContainer,
+      df: DataFrame,
+      partitionColumns: Array[String]): Unit = {
+    // Uses a local val for serialization
+    val needsConversion = relation.needConversion
+    val dataSchema = relation.dataSchema
+
+    require(
+      df.schema == relation.schema,
+      s"""DataFrame must have the same schema as the relation to which is inserted.
+         |DataFrame schema: ${df.schema}
+         |Relation schema: ${relation.schema}
+       """.stripMargin)
+
+    val partitionColumnsInSpec = relation.partitionColumns.fieldNames
+    require(
+      partitionColumnsInSpec.sameElements(partitionColumns),
+      s"""Partition columns mismatch.
+         |Expected: ${partitionColumnsInSpec.mkString(", ")}
+         |Actual: ${partitionColumns.mkString(", ")}
+       """.stripMargin)
+
+    val output = df.queryExecution.executedPlan.output
+    val (partitionOutput, dataOutput) = output.partition(a => partitionColumns.contains(a.name))
+    val codegenEnabled = df.sqlContext.conf.codegenEnabled
+
+    try {
+      writerContainer.driverSideSetup()
+      df.sqlContext.sparkContext.runJob(df.queryExecution.executedPlan.execute(), writeRows _)
+      writerContainer.commitJob()
+      relation.refresh()
+    } catch { case cause: Throwable =>
+      logError("Aborting job.", cause)
+      writerContainer.abortJob()
+      throw new SparkException("Job aborted.", cause)
+    }
+
+    def writeRows(taskContext: TaskContext, iterator: Iterator[Row]): Unit = {
+      writerContainer.executorSideSetup(taskContext)
+
+      val partitionProj = newProjection(codegenEnabled, partitionOutput, output)
+      val dataProj = newProjection(codegenEnabled, dataOutput, output)
+
+      if (needsConversion) {
+        val converter = CatalystTypeConverters.createToScalaConverter(dataSchema)
+        while (iterator.hasNext) {
+          val row = iterator.next()
+          val partitionPart = partitionProj(row)
+          val dataPart = dataProj(row)
+          val convertedDataPart = converter(dataPart).asInstanceOf[Row]
+          writerContainer.outputWriterForRow(partitionPart).write(convertedDataPart)
+        }
+      } else {
+        while (iterator.hasNext) {
+          val row = iterator.next()
+          val partitionPart = partitionProj(row)
+          val dataPart = dataProj(row)
+          writerContainer.outputWriterForRow(partitionPart).write(dataPart)
+        }
+      }
+
+      writerContainer.commitTask()
+    }
+  }
+
+  // This is copied from SparkPlan, probably should move this to a more general place.
+  private def newProjection(
+      codegenEnabled: Boolean,
+      expressions: Seq[Expression],
+      inputSchema: Seq[Attribute]): Projection = {
+    log.debug(
+      s"Creating Projection: $expressions, inputSchema: $inputSchema, codegen:$codegenEnabled")
+    if (codegenEnabled) {
+      GenerateProjection.generate(expressions, inputSchema)
+    } else {
+      new InterpretedProjection(expressions, inputSchema)
+    }
+  }
+}
+
+private[sql] abstract class BaseWriterContainer(
+    @transient val relation: FSBasedRelation,
+    @transient job: Job)
+  extends SparkHadoopMapReduceUtil
+  with Logging
+  with Serializable {
+
+  protected val serializableConf = new SerializableWritable(ContextUtil.getConfiguration(job))
+
+  // This is only used on driver side.
+  @transient private val jobContext: JobContext = job
+
+  // The following fields are initialized and used on both driver and executor side.
+  @transient protected var outputCommitter: FileOutputCommitter = _
+  @transient private var jobId: JobID = _
+  @transient private var taskId: TaskID = _
+  @transient private var taskAttemptId: TaskAttemptID = _
+  @transient protected var taskAttemptContext: TaskAttemptContext = _
+
+  protected val outputPath: String = {
+    assert(
+      relation.paths.length == 1,
+      s"Cannot write to multiple destinations: ${relation.paths.mkString(",")}")
+    relation.paths.head
+  }
+
+  protected val dataSchema = relation.dataSchema
+
+  protected val outputWriterClass: Class[_ <: OutputWriter] = relation.outputWriterClass
+
+  private var outputFormatClass: Class[_ <: OutputFormat[_, _]] = _
+
+  def driverSideSetup(): Unit = {
+    setupIDs(0, 0, 0)
+    setupConf()
+    taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId)
+    relation.prepareForWrite(job)
+    outputFormatClass = job.getOutputFormatClass
+    outputCommitter = newOutputCommitter(taskAttemptContext)
+    outputCommitter.setupJob(jobContext)
+  }
+
+  def executorSideSetup(taskContext: TaskContext): Unit = {
+    setupIDs(taskContext.stageId(), taskContext.partitionId(), taskContext.attemptNumber())
+    setupConf()
+    taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId)
+    outputCommitter = newOutputCommitter(taskAttemptContext)
+    outputCommitter.setupTask(taskAttemptContext)
+    initWriters()
+  }
+
+  private def newOutputCommitter(context: TaskAttemptContext): FileOutputCommitter = {
+    outputFormatClass.newInstance().getOutputCommitter(context) match {
+      case f: FileOutputCommitter => f
+      case f => sys.error(
+        s"FileOutputCommitter or its subclass is expected, but got a ${f.getClass.getName}.")
+    }
+  }
+
+  private def setupIDs(jobId: Int, splitId: Int, attemptId: Int): Unit = {
+    this.jobId = SparkHadoopWriter.createJobID(new Date, jobId)
+    this.taskId = new TaskID(this.jobId, true, splitId)
+    this.taskAttemptId = new TaskAttemptID(taskId, attemptId)
+  }
+
+  private def setupConf(): Unit = {
+    serializableConf.value.set("mapred.job.id", jobId.toString)
+    serializableConf.value.set("mapred.tip.id", taskAttemptId.getTaskID.toString)
+    serializableConf.value.set("mapred.task.id", taskAttemptId.toString)
+    serializableConf.value.setBoolean("mapred.task.is.map", true)
+    serializableConf.value.setInt("mapred.task.partition", 0)
+  }
+
+  // Called on executor side when writing rows
+  def outputWriterForRow(row: Row): OutputWriter
+
+  protected def initWriters(): Unit
+
+  def commitTask(): Unit = {
+    SparkHadoopMapRedUtil.commitTask(
+      outputCommitter, taskAttemptContext, jobId.getId, taskId.getId, taskAttemptId.getId)
+  }
+
+  def abortTask(): Unit = {
+    outputCommitter.abortTask(taskAttemptContext)
+    logError(s"Task attempt $taskAttemptId aborted.")
+  }
+
+  def commitJob(): Unit = {
+    outputCommitter.commitJob(jobContext)
+    logInfo(s"Job $jobId committed.")
+  }
+
+  def abortJob(): Unit = {
+    outputCommitter.abortJob(jobContext, JobStatus.State.FAILED)
+    logError(s"Job $jobId aborted.")
+  }
+}
+
+private[sql] class DefaultWriterContainer(
+    @transient relation: FSBasedRelation,
+    @transient job: Job)
+  extends BaseWriterContainer(relation, job) {
+
+  @transient private var writer: OutputWriter = _
+
+  override protected def initWriters(): Unit = {
+    writer = outputWriterClass.newInstance()
+    writer.init(outputCommitter.getWorkPath.toString, dataSchema, taskAttemptContext)
+  }
+
+  override def outputWriterForRow(row: Row): OutputWriter = writer
+
+  override def commitTask(): Unit = {
+    writer.close()
+    super.commitTask()
+  }
+
+  override def abortTask(): Unit = {
+    writer.close()
+    super.abortTask()
+  }
+}
+
+private[sql] class DynamicPartitionWriterContainer(
+    @transient relation: FSBasedRelation,
+    @transient job: Job,
+    partitionColumns: Array[String],
+    defaultPartitionName: String)
+  extends BaseWriterContainer(relation, job) {
+
+  // All output writers are created on executor side.
+  @transient protected var outputWriters: mutable.Map[String, OutputWriter] = _
+
+  override protected def initWriters(): Unit = {
+    outputWriters = mutable.Map.empty[String, OutputWriter]
+  }
+
+  override def outputWriterForRow(row: Row): OutputWriter = {
+    val partitionPath = partitionColumns.zip(row.toSeq).map { case (col, rawValue) =>
+      val string = if (rawValue == null) null else String.valueOf(rawValue)
+      val valueString = if (string == null || string.isEmpty) {
+        defaultPartitionName
+      } else {
+        DynamicPartitionWriterContainer.escapePathName(string)
+      }
+      s"/$col=$valueString"
+    }.mkString
+
+    outputWriters.getOrElseUpdate(partitionPath, {
+      val path = new Path(outputCommitter.getWorkPath, partitionPath.stripPrefix(Path.SEPARATOR))
+      val writer = outputWriterClass.newInstance()
+      writer.init(path.toString, dataSchema, taskAttemptContext)
+      writer
+    })
+  }
+
+  override def commitTask(): Unit = {
+    outputWriters.values.foreach(_.close())
+    super.commitTask()
+  }
+
+  override def abortTask(): Unit = {
+    outputWriters.values.foreach(_.close())
+    super.abortTask()
+  }
+}
+
+private[sql] object DynamicPartitionWriterContainer {
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils).
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  val charToEscape = {
+    val bitSet = new java.util.BitSet(128)
+
+    /**
+     * ASCII 01-1F are HTTP control characters that need to be escaped.
+     * \u000A and \u000D are \n and \r, respectively.
+     */
+    val clist = Array(
+      '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0009',
+      '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013',
+      '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C',
+      '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F',
+      '{', '[', ']', '^')
+
+    clist.foreach(bitSet.set(_))
+
+    if (Shell.WINDOWS) {
+      Array(' ', '<', '>', '|').foreach(bitSet.set(_))
+    }
+
+    bitSet
+  }
+
+  def needsEscaping(c: Char): Boolean = {
+    c >= 0 && c < charToEscape.size() && charToEscape.get(c)
+  }
+
+  def escapePathName(path: String): String = {
+    val builder = new StringBuilder()
+    path.foreach { c =>
+      if (DynamicPartitionWriterContainer.needsEscaping(c)) {
+        builder.append('%')
+        builder.append(f"${c.asInstanceOf[Int]}%02x")
+      } else {
+        builder.append(c)
+      }
+    }
+
+    builder.toString()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index 06c64f2bdd59e..595c5eb40e295 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -17,18 +17,20 @@
 
 package org.apache.spark.sql.sources
 
-import scala.language.existentials
+import scala.language.{existentials, implicitConversions}
 import scala.util.matching.Regex
-import scala.language.implicitConversions
+
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.{AnalysisException, SaveMode, DataFrame, SQLContext}
-import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Row}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext, SaveMode}
 import org.apache.spark.util.Utils
 
 /**
@@ -111,6 +113,7 @@ private[sql] class DDLParser(
           CreateTableUsingAsSelect(tableName,
             provider,
             temp.isDefined,
+            Array.empty[String],
             mode,
             options,
             queryPlan)
@@ -157,7 +160,7 @@ private[sql] class DDLParser(
   protected lazy val className: Parser[String] = repsep(ident, ".") ^^ { case s => s.mkString(".")}
 
   override implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
-    s"identifier matching regex ${regex}", {
+    s"identifier matching regex $regex", {
       case lexical.Identifier(str) if regex.unapplySeq(str).isDefined => str
       case lexical.Keyword(str) if regex.unapplySeq(str).isDefined => str
     }
@@ -214,6 +217,7 @@ private[sql] object ResolvedDataSource {
   def apply(
       sqlContext: SQLContext,
       userSpecifiedSchema: Option[StructType],
+      partitionColumns: Array[String],
       provider: String,
       options: Map[String, String]): ResolvedDataSource = {
     val clazz: Class[_] = lookupDataSource(provider)
@@ -222,6 +226,27 @@ private[sql] object ResolvedDataSource {
       case Some(schema: StructType) => clazz.newInstance() match {
         case dataSource: SchemaRelationProvider =>
           dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options), schema)
+        case dataSource: FSBasedRelationProvider =>
+          val maybePartitionsSchema = if (partitionColumns.isEmpty) {
+            None
+          } else {
+            Some(partitionColumnsSchema(schema, partitionColumns))
+          }
+
+          val caseInsensitiveOptions= new CaseInsensitiveMap(options)
+          val paths = {
+            val patternPath = new Path(caseInsensitiveOptions("path"))
+            SparkHadoopUtil.get.globPath(patternPath).map(_.toString).toArray
+          }
+
+          val dataSchema = StructType(schema.filterNot(f => partitionColumns.contains(f.name)))
+
+          dataSource.createRelation(
+            sqlContext,
+            paths,
+            Some(schema),
+            maybePartitionsSchema,
+            caseInsensitiveOptions)
         case dataSource: org.apache.spark.sql.sources.RelationProvider =>
           throw new AnalysisException(s"$className does not allow user-specified schemas.")
         case _ =>
@@ -231,20 +256,39 @@ private[sql] object ResolvedDataSource {
       case None => clazz.newInstance() match {
         case dataSource: RelationProvider =>
           dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options))
+        case dataSource: FSBasedRelationProvider =>
+          val caseInsensitiveOptions = new CaseInsensitiveMap(options)
+          val paths = {
+            val patternPath = new Path(caseInsensitiveOptions("path"))
+            SparkHadoopUtil.get.globPath(patternPath).map(_.toString).toArray
+          }
+          dataSource.createRelation(sqlContext, paths, None, None, caseInsensitiveOptions)
         case dataSource: org.apache.spark.sql.sources.SchemaRelationProvider =>
           throw new AnalysisException(
             s"A schema needs to be specified when using $className.")
         case _ =>
-          throw new AnalysisException(s"$className is not a RelationProvider.")
+          throw new AnalysisException(
+            s"$className is neither a RelationProvider nor a FSBasedRelationProvider.")
       }
     }
     new ResolvedDataSource(clazz, relation)
   }
 
+  private def partitionColumnsSchema(
+      schema: StructType,
+      partitionColumns: Array[String]): StructType = {
+    StructType(partitionColumns.map { col =>
+      schema.find(_.name == col).getOrElse {
+        throw new RuntimeException(s"Partition column $col not found in schema $schema")
+      }
+    }).asNullable
+  }
+
   /** Create a [[ResolvedDataSource]] for saving the content of the given [[DataFrame]]. */
   def apply(
       sqlContext: SQLContext,
       provider: String,
+      partitionColumns: Array[String],
       mode: SaveMode,
       options: Map[String, String],
       data: DataFrame): ResolvedDataSource = {
@@ -252,6 +296,31 @@ private[sql] object ResolvedDataSource {
     val relation = clazz.newInstance() match {
       case dataSource: CreatableRelationProvider =>
         dataSource.createRelation(sqlContext, mode, options, data)
+      case dataSource: FSBasedRelationProvider =>
+        // Don't glob path for the write path.  The contracts here are:
+        //  1. Only one output path can be specified on the write path;
+        //  2. Output path must be a legal HDFS style file system path;
+        //  3. It's OK that the output path doesn't exist yet;
+        val caseInsensitiveOptions = new CaseInsensitiveMap(options)
+        val outputPath = {
+          val path = new Path(caseInsensitiveOptions("path"))
+          val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+          path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        }
+        val dataSchema = StructType(data.schema.filterNot(f => partitionColumns.contains(f.name)))
+        val r = dataSource.createRelation(
+          sqlContext,
+          Array(outputPath.toString),
+          Some(dataSchema.asNullable),
+          Some(partitionColumnsSchema(data.schema, partitionColumns)),
+          caseInsensitiveOptions)
+        sqlContext.executePlan(
+          InsertIntoFSBasedRelation(
+            r,
+            data.logicalPlan,
+            partitionColumns.toArray,
+            mode)).toRdd
+        r
       case _ =>
         sys.error(s"${clazz.getCanonicalName} does not allow create table as select.")
     }
@@ -310,6 +379,7 @@ private[sql] case class CreateTableUsingAsSelect(
     tableName: String,
     provider: String,
     temporary: Boolean,
+    partitionColumns: Array[String],
     mode: SaveMode,
     options: Map[String, String],
     child: LogicalPlan) extends UnaryNode {
@@ -324,8 +394,9 @@ private[sql] case class CreateTempTableUsing(
     provider: String,
     options: Map[String, String]) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
-    val resolved = ResolvedDataSource(sqlContext, userSpecifiedSchema, provider, options)
+  def run(sqlContext: SQLContext): Seq[Row] = {
+    val resolved = ResolvedDataSource(
+      sqlContext, userSpecifiedSchema, Array.empty[String], provider, options)
     sqlContext.registerDataFrameAsTable(
       DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
     Seq.empty
@@ -335,13 +406,14 @@ private[sql] case class CreateTempTableUsing(
 private[sql] case class CreateTempTableUsingAsSelect(
     tableName: String,
     provider: String,
+    partitionColumns: Array[String],
     mode: SaveMode,
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val df = DataFrame(sqlContext, query)
-    val resolved = ResolvedDataSource(sqlContext, provider, mode, options, df)
+    val resolved = ResolvedDataSource(sqlContext, provider, partitionColumns, mode, options, df)
     sqlContext.registerDataFrameAsTable(
       DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index ca53dcdb92c52..5e010d21120f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -17,11 +17,19 @@
 
 package org.apache.spark.sql.sources
 
-import org.apache.spark.annotation.{Experimental, DeveloperApi}
+import scala.util.Try
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{SaveMode, DataFrame, Row, SQLContext}
-import org.apache.spark.sql.catalyst.expressions.{Expression, Attribute}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{Row, _}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
+import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
  * ::DeveloperApi::
@@ -78,6 +86,41 @@ trait SchemaRelationProvider {
       schema: StructType): BaseRelation
 }
 
+/**
+ * ::DeveloperApi::
+ * Implemented by objects that produce relations for a specific kind of data source
+ * with a given schema and partitioned columns.  When Spark SQL is given a DDL operation with a
+ * USING clause specified (to specify the implemented [[FSBasedRelationProvider]]), a user defined
+ * schema, and an optional list of partition columns, this interface is used to pass in the
+ * parameters specified by a user.
+ *
+ * Users may specify the fully qualified class name of a given data source.  When that class is
+ * not found Spark SQL will append the class name `DefaultSource` to the path, allowing for
+ * less verbose invocation.  For example, 'org.apache.spark.sql.json' would resolve to the
+ * data source 'org.apache.spark.sql.json.DefaultSource'
+ *
+ * A new instance of this class with be instantiated each time a DDL call is made.
+ *
+ * The difference between a [[RelationProvider]] and a [[FSBasedRelationProvider]] is
+ * that users need to provide a schema and a (possibly empty) list of partition columns when
+ * using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]],
+ * and [[FSBasedRelationProvider]] if it can support schema inference, user-specified
+ * schemas, and accessing partitioned relations.
+ */
+trait FSBasedRelationProvider {
+  /**
+   * Returns a new base relation with the given parameters, a user defined schema, and a list of
+   * partition columns. Note: the parameters' keywords are case insensitive and this insensitivity
+   * is enforced by the Map that is passed to the function.
+   */
+  def createRelation(
+      sqlContext: SQLContext,
+      paths: Array[String],
+      schema: Option[StructType],
+      partitionColumns: Option[StructType],
+      parameters: Map[String, String]): FSBasedRelation
+}
+
 @DeveloperApi
 trait CreatableRelationProvider {
   /**
@@ -207,3 +250,235 @@ trait InsertableRelation {
 trait CatalystScan {
   def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row]
 }
+
+/**
+ * ::Experimental::
+ * [[OutputWriter]] is used together with [[FSBasedRelation]] for persisting rows to the
+ * underlying file system.  Subclasses of [[OutputWriter]] must provide a zero-argument constructor.
+ * An [[OutputWriter]] instance is created and initialized when a new output file is opened on
+ * executor side.  This instance is used to persist rows to this single output file.
+ */
+@Experimental
+abstract class OutputWriter {
+  /**
+   * Initializes this [[OutputWriter]] before any rows are persisted.
+   *
+   * @param path Path of the file to which this [[OutputWriter]] is supposed to write.  Note that
+   *        this may not point to the final output file.  For example, `FileOutputFormat` writes to
+   *        temporary directories and then merge written files back to the final destination.  In
+   *        this case, `path` points to a temporary output file under the temporary directory.
+   * @param dataSchema Schema of the rows to be written. Partition columns are not included in the
+   *        schema if the corresponding relation is partitioned.
+   * @param context The Hadoop MapReduce task context.
+   */
+  def init(
+      path: String,
+      dataSchema: StructType,
+      context: TaskAttemptContext): Unit = ()
+
+  /**
+   * Persists a single row.  Invoked on the executor side.  When writing to dynamically partitioned
+   * tables, dynamic partition columns are not included in rows to be written.
+   */
+  def write(row: Row): Unit
+
+  /**
+   * Closes the [[OutputWriter]]. Invoked on the executor side after all rows are persisted, before
+   * the task output is committed.
+   */
+  def close(): Unit
+}
+
+/**
+ * ::Experimental::
+ * A [[BaseRelation]] that provides much of the common code required for formats that store their
+ * data to an HDFS compatible filesystem.
+ *
+ * For the read path, similar to [[PrunedFilteredScan]], it can eliminate unneeded columns and
+ * filter using selected predicates before producing an RDD containing all matching tuples as
+ * [[Row]] objects. In addition, when reading from Hive style partitioned tables stored in file
+ * systems, it's able to discover partitioning information from the paths of input directories, and
+ * perform partition pruning before start reading the data. Subclasses of [[FSBasedRelation()]] must
+ * override one of the three `buildScan` methods to implement the read path.
+ *
+ * For the write path, it provides the ability to write to both non-partitioned and partitioned
+ * tables.  Directory layout of the partitioned tables is compatible with Hive.
+ *
+ * @constructor This constructor is for internal uses only. The [[PartitionSpec]] argument is for
+ *              implementing metastore table conversion.
+ * @param paths Base paths of this relation.  For partitioned relations, it should be the root
+ *        directories of all partition directories.
+ * @param maybePartitionSpec An [[FSBasedRelation]] can be created with an optional
+ *        [[PartitionSpec]], so that partition discovery can be skipped.
+ */
+@Experimental
+abstract class FSBasedRelation private[sql](
+    val paths: Array[String],
+    maybePartitionSpec: Option[PartitionSpec])
+  extends BaseRelation {
+
+  /**
+   * Constructs an [[FSBasedRelation]].
+   *
+   * @param paths Base paths of this relation.  For partitioned relations, it should be either root
+   *        directories of all partition directories.
+   * @param partitionColumns Partition columns of this relation.
+   */
+  def this(paths: Array[String], partitionColumns: StructType) =
+    this(paths, {
+      if (partitionColumns.isEmpty) None
+      else Some(PartitionSpec(partitionColumns, Array.empty[Partition]))
+    })
+
+  /**
+   * Constructs an [[FSBasedRelation]].
+   *
+   * @param paths Base paths of this relation.  For partitioned relations, it should be root
+   *        directories of all partition directories.
+   */
+  def this(paths: Array[String]) = this(paths, None)
+
+  private val hadoopConf = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
+
+  private val codegenEnabled = sqlContext.conf.codegenEnabled
+
+  private var _partitionSpec: PartitionSpec = maybePartitionSpec.map { spec =>
+    spec.copy(partitionColumns = spec.partitionColumns.asNullable)
+  }.getOrElse {
+    if (sqlContext.conf.partitionDiscoveryEnabled()) {
+      discoverPartitions()
+    } else {
+      PartitionSpec(StructType(Nil), Array.empty[Partition])
+    }
+  }
+
+  private[sql] def partitionSpec: PartitionSpec = _partitionSpec
+
+  /**
+   * Partition columns. Note that they are always nullable.
+   */
+  def partitionColumns: StructType = partitionSpec.partitionColumns
+
+  private[sql] def refresh(): Unit = {
+    if (sqlContext.conf.partitionDiscoveryEnabled()) {
+      _partitionSpec = discoverPartitions()
+    }
+  }
+
+  private def discoverPartitions(): PartitionSpec = {
+    val basePaths = paths.map(new Path(_))
+    val leafDirs = basePaths.flatMap { path =>
+      val fs = path.getFileSystem(hadoopConf)
+      Try(fs.getFileStatus(path.makeQualified(fs.getUri, fs.getWorkingDirectory)))
+        .filter(_.isDir)
+        .map(SparkHadoopUtil.get.listLeafDirStatuses(fs, _))
+        .getOrElse(Seq.empty[FileStatus])
+    }.map(_.getPath)
+
+    if (leafDirs.nonEmpty) {
+      PartitioningUtils.parsePartitions(leafDirs, "__HIVE_DEFAULT_PARTITION__")
+    } else {
+      PartitionSpec(StructType(Array.empty[StructField]), Array.empty[Partition])
+    }
+  }
+
+  /**
+   * Schema of this relation.  It consists of columns appearing in [[dataSchema]] and all partition
+   * columns not appearing in [[dataSchema]].
+   */
+  override lazy val schema: StructType = {
+    val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet
+    StructType(dataSchema ++ partitionSpec.partitionColumns.filterNot { column =>
+      dataSchemaColumnNames.contains(column.name.toLowerCase)
+    })
+  }
+
+  /**
+   * Specifies schema of actual data files.  For partitioned relations, if one or more partitioned
+   * columns are contained in the data files, they should also appear in `dataSchema`.
+   */
+  def dataSchema: StructType
+
+  /**
+   * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within
+   * this relation. For partitioned relations, this method is called for each selected partition,
+   * and builds an `RDD[Row]` containing all rows within that single partition.
+   *
+   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   *        relation. For a partitioned relation, it contains paths of all data files in a single
+   *        selected partition.
+   */
+  def buildScan(inputPaths: Array[String]): RDD[Row] = {
+    throw new RuntimeException(
+      "At least one buildScan() method should be overridden to read the relation.")
+  }
+
+  /**
+   * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within
+   * this relation. For partitioned relations, this method is called for each selected partition,
+   * and builds an `RDD[Row]` containing all rows within that single partition.
+   *
+   * @param requiredColumns Required columns.
+   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   *        relation. For a partitioned relation, it contains paths of all data files in a single
+   *        selected partition.
+   */
+  def buildScan(requiredColumns: Array[String], inputPaths: Array[String]): RDD[Row] = {
+    // Yeah, to workaround serialization...
+    val dataSchema = this.dataSchema
+    val codegenEnabled = this.codegenEnabled
+
+    val requiredOutput = requiredColumns.map { col =>
+      val field = dataSchema(col)
+      BoundReference(dataSchema.fieldIndex(col), field.dataType, field.nullable)
+    }.toSeq
+
+    buildScan(inputPaths).mapPartitions { rows =>
+      val buildProjection = if (codegenEnabled) {
+        GenerateMutableProjection.generate(requiredOutput, dataSchema.toAttributes)
+      } else {
+        () => new InterpretedMutableProjection(requiredOutput, dataSchema.toAttributes)
+      }
+
+      val mutableProjection = buildProjection()
+      rows.map(mutableProjection)
+    }
+  }
+
+  /**
+   * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within
+   * this relation. For partitioned relations, this method is called for each selected partition,
+   * and builds an `RDD[Row]` containing all rows within that single partition.
+   *
+   * @param requiredColumns Required columns.
+   * @param filters Candidate filters to be pushed down. The actual filter should be the conjunction
+   *        of all `filters`.  The pushed down filters are currently purely an optimization as they
+   *        will all be evaluated again. This means it is safe to use them with methods that produce
+   *        false positives such as filtering partitions based on a bloom filter.
+   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   *        relation. For a partitioned relation, it contains paths of all data files in a single
+   *        selected partition.
+   */
+  def buildScan(
+      requiredColumns: Array[String],
+      filters: Array[Filter],
+      inputPaths: Array[String]): RDD[Row] = {
+    buildScan(requiredColumns, inputPaths)
+  }
+
+  /**
+   * Client side preparation for data writing can be put here.  For example, user defined output
+   * committer can be configured here.
+   *
+   * Note that the only side effect expected here is mutating `job` via its setters.  Especially,
+   * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states
+   * may cause unexpected behaviors.
+   */
+  def prepareForWrite(job: Job): Unit = ()
+
+  /**
+   * This method is responsible for producing a new [[OutputWriter]] for each newly opened output
+   * file on the executor side.
+   */
+  def outputWriterClass: Class[_ <: OutputWriter]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
index 6ed68d179edc9..aad1d248d0a28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
@@ -101,13 +101,13 @@ private[sql] case class PreWriteCheck(catalog: Catalog) extends (LogicalPlan =>
           }
         }
 
-      case i @ logical.InsertIntoTable(
-        l: LogicalRelation, partition, query, overwrite, ifNotExists)
-          if !l.isInstanceOf[InsertableRelation] =>
+      case logical.InsertIntoTable(LogicalRelation(_: InsertableRelation), _, _, _, _) => // OK
+      case logical.InsertIntoTable(LogicalRelation(_: FSBasedRelation), _, _, _, _) => // OK
+      case logical.InsertIntoTable(l: LogicalRelation, _, _, _, _) =>
         // The relation in l is not an InsertableRelation.
         failAnalysis(s"$l does not allow insertion.")
 
-      case CreateTableUsingAsSelect(tableName, _, _, SaveMode.Overwrite, _, query) =>
+      case CreateTableUsingAsSelect(tableName, _, _, _, SaveMode.Overwrite, _, query) =>
         // When the SaveMode is Overwrite, we need to check if the table is an input table of
         // the query. If so, we will throw an AnalysisException to let users know it is not allowed.
         if (catalog.tableExists(Seq(tableName))) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index b7561ce7298cb..bea568ed40049 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -21,7 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.parquet.ParquetRelation2._
+import org.apache.spark.sql.sources.PartitioningUtils._
+import org.apache.spark.sql.sources.{Partition, PartitionSpec}
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{QueryTest, Row, SQLContext}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 54f2f3cdec298..4e54b2eb8df7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.sources
 
-import java.io.{IOException, File}
+import java.io.{File, IOException}
 
-import org.apache.spark.sql.AnalysisException
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.util.Utils
 
 class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index bbf48efb24440..d754c8e3a8aa1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -19,25 +19,24 @@ package org.apache.spark.sql.hive
 
 import com.google.common.base.Objects
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
-
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.metastore.Warehouse
+import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.ql.metadata._
 import org.apache.hadoop.hive.serde2.Deserializer
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext}
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Catalog, OverrideCatalog}
+import org.apache.spark.sql.catalyst.analysis.{Catalog, MultiInstanceRelation, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.hive.client._
-import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec}
-import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, ResolvedDataSource}
+import org.apache.spark.sql.parquet.ParquetRelation2
+import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, Partition => ParquetPartition, PartitionSpec, ResolvedDataSource}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{AnalysisException, SQLContext, SaveMode}
 import org.apache.spark.util.Utils
 
 /* Implicit conversions */
@@ -98,6 +97,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           ResolvedDataSource(
             hive,
             userSpecifiedSchema,
+            Array.empty[String],
             table.properties("spark.sql.sources.provider"),
             options)
 
@@ -438,6 +438,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
             desc.name,
             hive.conf.defaultDataSourceName,
             temporary = false,
+            Array.empty[String],
             mode,
             options = Map.empty[String, String],
             child
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index be9249a8b1f44..d46a127d47d31 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -221,14 +221,14 @@ private[hive] trait HiveStrategies {
   object HiveDDLStrategy extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case CreateTableUsing(
-      tableName, userSpecifiedSchema, provider, false, opts, allowExisting, managedIfNoPath) =>
+          tableName, userSpecifiedSchema, provider, false, opts, allowExisting, managedIfNoPath) =>
         ExecutedCommand(
           CreateMetastoreDataSource(
             tableName, userSpecifiedSchema, provider, opts, allowExisting, managedIfNoPath)) :: Nil
 
-      case CreateTableUsingAsSelect(tableName, provider, false, mode, opts, query) =>
+      case CreateTableUsingAsSelect(tableName, provider, false, partitionCols, mode, opts, query) =>
         val cmd =
-          CreateMetastoreDataSourceAsSelect(tableName, provider, mode, opts, query)
+          CreateMetastoreDataSourceAsSelect(tableName, provider, partitionCols, mode, opts, query)
         ExecutedCommand(cmd) :: Nil
 
       case _ => Nil
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index abab1a223a43a..8e405e080489f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -158,6 +158,7 @@ private[hive]
 case class CreateMetastoreDataSourceAsSelect(
     tableName: String,
     provider: String,
+    partitionColumns: Array[String],
     mode: SaveMode,
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
@@ -189,12 +190,12 @@ case class CreateMetastoreDataSourceAsSelect(
           return Seq.empty[Row]
         case SaveMode.Append =>
           // Check if the specified data source match the data source of the existing table.
-          val resolved =
-            ResolvedDataSource(sqlContext, Some(query.schema), provider, optionsWithPath)
+          val resolved = ResolvedDataSource(
+            sqlContext, Some(query.schema.asNullable), partitionColumns, provider, optionsWithPath)
           val createdRelation = LogicalRelation(resolved.relation)
           EliminateSubQueries(sqlContext.table(tableName).logicalPlan) match {
-            case l @ LogicalRelation(i: InsertableRelation) =>
-              if (i != createdRelation.relation) {
+            case l @ LogicalRelation(_: InsertableRelation | _: FSBasedRelation) =>
+              if (l.relation != createdRelation.relation) {
                 val errorDescription =
                   s"Cannot append to table $tableName because the resolved relation does not " +
                   s"match the existing relation of $tableName. " +
@@ -202,14 +203,13 @@ case class CreateMetastoreDataSourceAsSelect(
                   s"table $tableName and using its data source and options."
                 val errorMessage =
                   s"""
-                |$errorDescription
-                |== Relations ==
-                |${sideBySide(
-                s"== Expected Relation ==" ::
-                  l.toString :: Nil,
-                s"== Actual Relation ==" ::
-                  createdRelation.toString :: Nil).mkString("\n")}
-              """.stripMargin
+                     |$errorDescription
+                     |== Relations ==
+                     |${sideBySide(
+                        s"== Expected Relation ==" :: l.toString :: Nil,
+                        s"== Actual Relation ==" :: createdRelation.toString :: Nil
+                      ).mkString("\n")}
+                   """.stripMargin
                 throw new AnalysisException(errorMessage)
               }
               existingSchema = Some(l.schema)
@@ -234,7 +234,8 @@ case class CreateMetastoreDataSourceAsSelect(
     }
 
     // Create the relation based on the data of df.
-    val resolved = ResolvedDataSource(sqlContext, provider, mode, optionsWithPath, df)
+    val resolved =
+      ResolvedDataSource(sqlContext, provider, partitionColumns, mode, optionsWithPath, df)
 
     if (createMetastoreTable) {
       // We will use the schema of resolved.relation as the schema of the table (instead of
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
index 8398da268174d..cbc381cc81b59 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -204,7 +204,7 @@ private[spark] class SparkHiveDynamicPartitionWriterContainer(
           if (string == null || string.isEmpty) {
             defaultPartName
           } else {
-            FileUtils.escapePathName(string)
+            FileUtils.escapePathName(string, defaultPartName)
           }
         s"/$col=$colString"
       }.mkString
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
new file mode 100644
index 0000000000000..415b1cd168848
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
@@ -0,0 +1,525 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.parquet.ParquetTest
+import org.apache.spark.sql.types._
+
+// TODO Don't extend ParquetTest
+// This test suite extends ParquetTest for some convenient utility methods. These methods should be
+// moved to some more general places, maybe QueryTest.
+class FSBasedRelationSuite extends QueryTest with ParquetTest {
+  override val sqlContext: SQLContext = TestHive
+
+  import sqlContext._
+  import sqlContext.implicits._
+
+  val dataSchema =
+    StructType(
+      Seq(
+        StructField("a", IntegerType, nullable = false),
+        StructField("b", StringType, nullable = false)))
+
+  val testDF = (1 to 3).map(i => (i, s"val_$i")).toDF("a", "b")
+
+  val partitionedTestDF1 = (for {
+    i <- 1 to 3
+    p2 <- Seq("foo", "bar")
+  } yield (i, s"val_$i", 1, p2)).toDF("a", "b", "p1", "p2")
+
+  val partitionedTestDF2 = (for {
+    i <- 1 to 3
+    p2 <- Seq("foo", "bar")
+  } yield (i, s"val_$i", 2, p2)).toDF("a", "b", "p1", "p2")
+
+  val partitionedTestDF = partitionedTestDF1.unionAll(partitionedTestDF2)
+
+  def checkQueries(df: DataFrame): Unit = {
+    // Selects everything
+    checkAnswer(
+      df,
+      for (i <- 1 to 3; p1 <- 1 to 2; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", p1, p2))
+
+    // Simple filtering and partition pruning
+    checkAnswer(
+      df.filter('a > 1 && 'p1 === 2),
+      for (i <- 2 to 3; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", 2, p2))
+
+    // Simple projection and filtering
+    checkAnswer(
+      df.filter('a > 1).select('b, 'a + 1),
+      for (i <- 2 to 3; _ <- 1 to 2; _ <- Seq("foo", "bar")) yield Row(s"val_$i", i + 1))
+
+    // Simple projection and partition pruning
+    checkAnswer(
+      df.filter('a > 1 && 'p1 < 2).select('b, 'p1),
+      for (i <- 2 to 3; _ <- Seq("foo", "bar")) yield Row(s"val_$i", 1))
+
+    // Self-join
+    df.registerTempTable("t")
+    withTempTable("t") {
+      checkAnswer(
+        sql(
+          """SELECT l.a, r.b, l.p1, r.p2
+            |FROM t l JOIN t r
+            |ON l.a = r.a AND l.p1 = r.p1 AND l.p2 = r.p2
+          """.stripMargin),
+        for (i <- 1 to 3; p1 <- 1 to 2; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", p1, p2))
+    }
+  }
+
+  test("save()/load() - non-partitioned table - Overwrite") {
+    withTempPath { file =>
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite)
+
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite)
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        testDF.collect())
+    }
+  }
+
+  test("save()/load() - non-partitioned table - Append") {
+    withTempPath { file =>
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite)
+
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append)
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)).orderBy("a"),
+        testDF.unionAll(testDF).orderBy("a").collect())
+    }
+  }
+
+  test("save()/load() - non-partitioned table - ErrorIfExists") {
+    withTempDir { file =>
+      intercept[RuntimeException] {
+        testDF.save(
+          path = file.getCanonicalPath,
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists)
+      }
+    }
+  }
+
+  test("save()/load() - non-partitioned table - Ignore") {
+    withTempDir { file =>
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore)
+
+      val path = new Path(file.getCanonicalPath)
+      val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+      assert(fs.listStatus(path).isEmpty)
+    }
+  }
+
+  test("save()/load() - partitioned table - simple queries") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.ErrorIfExists,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkQueries(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)))
+    }
+  }
+
+  test("save()/load() - partitioned table - simple queries - partition columns in data") {
+    withTempDir { file =>
+      val basePath = new Path(file.getCanonicalPath)
+      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
+      val qualifiedBasePath = fs.makeQualified(basePath)
+
+      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
+        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
+        sparkContext
+          .parallelize(for (i <- 1 to 3) yield s"$i,val_$i,$p1")
+          .saveAsTextFile(partitionDir.toString)
+      }
+
+      val dataSchemaWithPartition =
+        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
+
+      checkQueries(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchemaWithPartition.json)))
+    }
+  }
+
+  test("save()/load() - partitioned table - Overwrite") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        partitionedTestDF.collect())
+    }
+  }
+
+  test("save()/load() - partitioned table - Append") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        partitionedTestDF.unionAll(partitionedTestDF).collect())
+    }
+  }
+
+  test("save()/load() - partitioned table - Append - new partition values") {
+    withTempPath { file =>
+      partitionedTestDF1.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      partitionedTestDF2.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        partitionedTestDF.collect())
+    }
+  }
+
+  test("save()/load() - partitioned table - ErrorIfExists") {
+    withTempDir { file =>
+      intercept[RuntimeException] {
+        partitionedTestDF.save(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists,
+          options = Map("path" -> file.getCanonicalPath),
+          partitionColumns = Seq("p1", "p2"))
+      }
+    }
+  }
+
+  test("save()/load() - partitioned table - Ignore") {
+    withTempDir { file =>
+      partitionedTestDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore)
+
+      val path = new Path(file.getCanonicalPath)
+      val fs = path.getFileSystem(SparkHadoopUtil.get.conf)
+      assert(fs.listStatus(path).isEmpty)
+    }
+  }
+
+  def withTable(tableName: String)(f: => Unit): Unit = {
+    try f finally sql(s"DROP TABLE $tableName")
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - Overwrite") {
+    testDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      Map("dataSchema" -> dataSchema.json))
+
+    withTable("t") {
+      checkAnswer(table("t"), testDF.collect())
+    }
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - Append") {
+    testDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite)
+
+    testDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Append)
+
+    withTable("t") {
+      checkAnswer(table("t"), testDF.unionAll(testDF).orderBy("a").collect())
+    }
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - ErrorIfExists") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      intercept[AnalysisException] {
+        testDF.saveAsTable(
+          tableName = "t",
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists)
+      }
+    }
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - Ignore") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      testDF.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore)
+
+      assert(table("t").collect().isEmpty)
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - simple queries") {
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      Map("dataSchema" -> dataSchema.json))
+
+    withTable("t") {
+      checkQueries(table("t"))
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Overwrite") {
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    withTable("t") {
+      checkAnswer(table("t"), partitionedTestDF.collect())
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Append") {
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Append,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    withTable("t") {
+      checkAnswer(table("t"), partitionedTestDF.unionAll(partitionedTestDF).collect())
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Append - new partition values") {
+    partitionedTestDF1.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    partitionedTestDF2.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Append,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    withTable("t") {
+      checkAnswer(table("t"), partitionedTestDF.collect())
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Append - mismatched partition columns") {
+    partitionedTestDF1.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    // Using only a subset of all partition columns
+    intercept[Throwable] {
+      partitionedTestDF2.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("dataSchema" -> dataSchema.json),
+        partitionColumns = Seq("p1"))
+    }
+
+    // Using different order of partition columns
+    intercept[Throwable] {
+      partitionedTestDF2.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("dataSchema" -> dataSchema.json),
+        partitionColumns = Seq("p2", "p1"))
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - ErrorIfExists") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      intercept[AnalysisException] {
+        partitionedTestDF.saveAsTable(
+          tableName = "t",
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists,
+          options = Map("dataSchema" -> dataSchema.json),
+          partitionColumns = Seq("p1", "p2"))
+      }
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Ignore") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      partitionedTestDF.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore,
+        options = Map("dataSchema" -> dataSchema.json),
+        partitionColumns = Seq("p1", "p2"))
+
+      assert(table("t").collect().isEmpty)
+    }
+  }
+
+  test("Hadoop style globbing") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      val df = load(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        options = Map(
+          "path" -> s"${file.getCanonicalPath}/p1=*/p2=???",
+          "dataSchema" -> dataSchema.json))
+
+      val expectedPaths = Set(
+        s"${file.getCanonicalFile}/p1=1/p2=foo",
+        s"${file.getCanonicalFile}/p1=2/p2=foo",
+        s"${file.getCanonicalFile}/p1=1/p2=bar",
+        s"${file.getCanonicalFile}/p1=2/p2=bar"
+      ).map { p =>
+        val path = new Path(p)
+        val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+        path.makeQualified(fs.getUri, fs.getWorkingDirectory).toString
+      }
+
+      println(df.queryExecution)
+
+      val actualPaths = df.queryExecution.analyzed.collectFirst {
+        case LogicalRelation(relation: FSBasedRelation) =>
+          relation.paths.toSet
+      }.getOrElse {
+        fail("Expect an FSBasedRelation, but none could be found")
+      }
+
+      assert(actualPaths === expectedPaths)
+      checkAnswer(df, partitionedTestDF.collect())
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
new file mode 100644
index 0000000000000..8801aba2f64c3
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.text.NumberFormat
+import java.util.UUID
+
+import com.google.common.base.Objects
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{NullWritable, Text}
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat, TextOutputFormat}
+import org.apache.hadoop.mapreduce.{RecordWriter, TaskAttemptContext}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Row, SQLContext}
+
+/**
+ * A simple example [[FSBasedRelationProvider]].
+ */
+class SimpleTextSource extends FSBasedRelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      paths: Array[String],
+      schema: Option[StructType],
+      partitionColumns: Option[StructType],
+      parameters: Map[String, String]): FSBasedRelation = {
+    val partitionsSchema = partitionColumns.getOrElse(StructType(Array.empty[StructField]))
+    new SimpleTextRelation(paths, schema, partitionsSchema, parameters)(sqlContext)
+  }
+}
+
+class AppendingTextOutputFormat(outputFile: Path) extends TextOutputFormat[NullWritable, Text] {
+  val numberFormat = NumberFormat.getInstance()
+
+  numberFormat.setMinimumIntegerDigits(5)
+  numberFormat.setGroupingUsed(false)
+
+  override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
+    val split = context.getTaskAttemptID.getTaskID.getId
+    val name = FileOutputFormat.getOutputName(context)
+    new Path(outputFile, s"$name-${numberFormat.format(split)}-${UUID.randomUUID()}")
+  }
+}
+
+class SimpleTextOutputWriter extends OutputWriter {
+  private var recordWriter: RecordWriter[NullWritable, Text] = _
+  private var taskAttemptContext: TaskAttemptContext = _
+
+  override def init(
+      path: String,
+      dataSchema: StructType,
+      context: TaskAttemptContext): Unit = {
+    recordWriter = new AppendingTextOutputFormat(new Path(path)).getRecordWriter(context)
+    taskAttemptContext = context
+  }
+
+  override def write(row: Row): Unit = {
+    val serialized = row.toSeq.map(_.toString).mkString(",")
+    recordWriter.write(null, new Text(serialized))
+  }
+
+  override def close(): Unit = recordWriter.close(taskAttemptContext)
+}
+
+/**
+ * A simple example [[FSBasedRelation]], used for testing purposes.  Data are stored as comma
+ * separated string lines.  When scanning data, schema must be explicitly provided via data source
+ * option `"dataSchema"`.
+ */
+class SimpleTextRelation(
+    paths: Array[String],
+    val maybeDataSchema: Option[StructType],
+    partitionsSchema: StructType,
+    parameters: Map[String, String])(
+    @transient val sqlContext: SQLContext)
+  extends FSBasedRelation(paths, partitionsSchema) {
+
+  import sqlContext.sparkContext
+
+  override val dataSchema: StructType =
+    maybeDataSchema.getOrElse(DataType.fromJson(parameters("dataSchema")).asInstanceOf[StructType])
+
+  override def equals(other: Any): Boolean = other match {
+    case that: SimpleTextRelation =>
+      this.paths.sameElements(that.paths) &&
+        this.maybeDataSchema == that.maybeDataSchema &&
+        this.dataSchema == that.dataSchema &&
+        this.partitionColumns == that.partitionColumns
+
+    case _ => false
+  }
+
+  override def hashCode(): Int =
+    Objects.hashCode(paths, maybeDataSchema, dataSchema)
+
+  override def outputWriterClass: Class[_ <: OutputWriter] =
+    classOf[SimpleTextOutputWriter]
+
+  override def buildScan(inputPaths: Array[String]): RDD[Row] = {
+    val fields = dataSchema.map(_.dataType)
+
+    sparkContext.textFile(inputPaths.mkString(",")).map { record =>
+      Row(record.split(",").zip(fields).map { case (value, dataType) =>
+        Cast(Literal(value), dataType).eval()
+      }: _*)
+    }
+  }
+}

From bfcaf8adcdc20dec203e2e9d5a72b52dd6f226a9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Tue, 12 May 2015 10:37:57 -0700
Subject: [PATCH 37/59] [DataFrame][minor] support column in field accessor

Minor improvement, now we can use `Column` as extraction expression.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6080 from cloud-fan/tmp and squashes the following commits:

0fdefb7 [Wenchen Fan] support column in field accessor
---
 sql/core/src/main/scala/org/apache/spark/sql/Column.scala       | 2 +-
 .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index e6e475bb82f82..4d50821620f5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -78,7 +78,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    *
    * @group expr_ops
    */
-  def apply(field: Any): Column = UnresolvedExtractValue(expr, Literal(field))
+  def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr)
 
   /**
    * Unary minus, i.e. negate the expression.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 2ade955864b71..d58438e5d129c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -456,6 +456,7 @@ class DataFrameSuite extends QueryTest {
     assert(complexData.filter(complexData("a")(0) === 2).count() == 1)
     assert(complexData.filter(complexData("m")("1") === 1).count() == 1)
     assert(complexData.filter(complexData("s")("key") === 1).count() == 1)
+    assert(complexData.filter(complexData("m")(complexData("s")("value")) === 1).count() == 1)
   }
 
   test("SPARK-7324 dropDuplicates") {

From 65697bbeafe507dda066e2dc14ca5183f278dfe9 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Tue, 12 May 2015 11:17:59 -0700
Subject: [PATCH 38/59] [SPARK-7500] DAG visualization: move cluster labeling
 to dagre-d3

This fixes the label bleeding issue described in the JIRA and pictured in the screenshots below. I also took the opportunity to move some code to the places that they belong more to. In particular:

(1) Drawing cluster labels is now implemented in my branch of dagre-d3 instead of in Spark
(2) All graph styling is now moved from Scala to JS

Note that these changes are related because our existing mechanism of "tacking on cluster labels" afterwards isn't flexible enough for us to fix issues like this one easily. For the other half of the changes, visit http://github.com/andrewor14/dagre-d3.

-------------------

**Before.**
<img src="https://cloud.githubusercontent.com/assets/2133137/7582769/b1423440-f845-11e4-8248-b3446a01bf79.png" width="300px"/>

-------------------

**After.**
<img src="https://cloud.githubusercontent.com/assets/2133137/7582742/74891ae6-f845-11e4-96c4-41c7b8aedbdf.png" width="400px"/>

Author: Andrew Or <andrew@databricks.com>

Closes #6076 from andrewor14/dag-viz-bleed and squashes the following commits:

5858d7a [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-bleed
c686dc4 [Andrew Or] Fix tooltip placement
d908c36 [Andrew Or] Add link to dagre-d3 changes (minor)
4a4fb58 [Andrew Or] Fix bleeding + move all styling to JS
---
 .../apache/spark/ui/static/dagre-d3.min.js    | 14 ++--
 .../apache/spark/ui/static/spark-dag-viz.js   | 79 ++++++++-----------
 .../scala/org/apache/spark/ui/UIUtils.scala   |  2 +-
 .../spark/ui/scope/RDDOperationGraph.scala    | 40 +++-------
 4 files changed, 48 insertions(+), 87 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
index 6beb7db855ca0..acf2d93b718b2 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
@@ -1,4 +1,4 @@
-!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.dagreD3=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){/**
+/* This is a custom version of dagre-d3 on top of v0.4.3. The full list of commits can be found at http://github.com/andrewor14/dagre-d3/ */!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.dagreD3=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){/**
  * @license
  * Copyright (c) 2012-2013 Chris Pettitt
  *
@@ -20,12 +20,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});svgClusters.enter().append("g").attr("id",function(v){return"cluster_"+v.replace(/^cluster/,"")}).attr("name",function(v){return g.node(v).label}).attr("class","cluster").style("opacity",0).append("rect");util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop})}},{"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("id",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).attr("class","node").style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var shapeBBox=shapeSvg.node().getBBox();node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));
+module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});var makeClusterIdentifier=function(v){return"cluster_"+v.replace(/^cluster/,"")};svgClusters.enter().append("g").attr("id",makeClusterIdentifier).attr("name",function(v){return g.node(v).label}).classed("cluster",true).style("opacity",0).append("rect");var sortedClusters=util.orderByRank(g,svgClusters.data());for(var i=0;i<sortedClusters.length;i++){var v=sortedClusters[i];var node=g.node(v);if(node.label){var thisGroup=selection.select("g.cluster#"+makeClusterIdentifier(v));labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),bbox=_.pick(labelDom.node().getBBox(),"width","height");node.paddingTop+=bbox.height;node.paddingTop+=util.getMaxChildPaddingTop(g,v)}}util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop});svgClusters.each(function(){var cluster=d3.select(this),label=cluster.select("g.label"),rect=cluster.select("rect"),bbox=label.node().getBBox(),labelW=bbox.width,labelH=bbox.height;var num=function(x){return parseFloat(x.toString().replace(/px$/,""))};var labelX=num(rect.attr("x"))+num(rect.attr("width"))-labelH/2-labelW/2;var labelY=num(rect.attr("y"))+labelH;label.attr("transform","translate("+labelX+","+labelY+")")})}},{"./label/add-label":18,"./lodash":20,"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("id",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).classed("node",true).style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var requiredWidth=0,requiredHeight=0;var nextNode=g.node(g.parent(v));while(nextNode){var tempGroup=thisGroup.append("g");var tempLabel=addLabel(tempGroup,nextNode);var tempBBox=tempLabel.node().getBBox();tempBBox.width-=50;requiredWidth=Math.max(requiredWidth,tempBBox.width);requiredHeight=Math.max(requiredHeight,tempBBox.height);tempLabel.remove();nextNode=g.node(g.parent(nextNode.label))}var shapeBBox=shapeSvg.node().getBBox();shapeBBox.width=Math.max(shapeBBox.width,requiredWidth);shapeBBox.height=Math.max(shapeBBox.height,requiredHeight);node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,getMaxChildPaddingTop:getMaxChildPaddingTop,orderByRank:orderByRank,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function getMaxChildPaddingTop(g,v){var maxPadding=0;var children=g.children(v);for(var i=0;i<children.length;i++){var child=g.node(children[i]);if(child.paddingTop&&child.paddingTop>maxPadding){maxPadding=child.paddingTop}}return maxPadding}function getRank(g,v){var maxRank=0;var children=g.children(v);for(var i=0;i<children.length;i++){var thisRank=getRank(g,children[i])+1;if(thisRank>maxRank){maxRank=thisRank}}return maxRank}function orderByRank(g,nodes){return nodes.sort(function(x,y){return getRank(g,x)-getRank(g,y)})}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){
+parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;
 
-_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;
+if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{};
 
-do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{}}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length<arity){bitmask|=16&~32;return baseCreateWrapper([func,isCurryBound?bitmask:bitmask&~3,args,null,thisArg,arity])}}args||(args=arguments);if(isBindKey){func=thisBinding[key]}if(this instanceof bound){thisBinding=baseCreate(func.prototype);var result=func.apply(thisBinding,args);return isObject(result)?result:thisBinding}return func.apply(thisBinding,args)}setBindData(bound,bindData);return bound}function baseDifference(array,values){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,isLarge=length>=largeArraySize&&indexOf===baseIndexOf,result=[];
-
-if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index<length){var value=array[index];if(indexOf(values,value)<0){result.push(value)}}if(isLarge){releaseObject(values)}return result}function baseFlatten(array,isShallow,isStrict,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value&&typeof value=="object"&&typeof value.length=="number"&&(isArray(value)||isArguments(value))){if(!isShallow){value=baseFlatten(value,isShallow,isStrict)}var valIndex=-1,valLength=value.length,resIndex=result.length;result.length+=valLength;while(++valIndex<valLength){result[resIndex++]=value[valIndex]}}else if(!isStrict){result.push(value)}}return result}function baseIsEqual(a,b,callback,isWhere,stackA,stackB){if(callback){var result=callback(a,b);if(typeof result!="undefined"){return!!result}}if(a===b){return a!==0||1/a==1/b}var type=typeof a,otherType=typeof b;if(a===a&&!(a&&objectTypes[type])&&!(b&&objectTypes[otherType])){return false}if(a==null||b==null){return a===b}var className=toString.call(a),otherClass=toString.call(b);if(className==argsClass){className=objectClass}if(otherClass==argsClass){otherClass=objectClass}if(className!=otherClass){return false}switch(className){case boolClass:case dateClass:return+a==+b;case numberClass:return a!=+a?b!=+b:a==0?1/a==1/b:a==+b;case regexpClass:case stringClass:return a==String(b)}var isArr=className==arrayClass;if(!isArr){var aWrapped=hasOwnProperty.call(a,"__wrapped__"),bWrapped=hasOwnProperty.call(b,"__wrapped__");if(aWrapped||bWrapped){return baseIsEqual(aWrapped?a.__wrapped__:a,bWrapped?b.__wrapped__:b,callback,isWhere,stackA,stackB)}if(className!=objectClass){return false}var ctorA=a.constructor,ctorB=b.constructor;if(ctorA!=ctorB&&!(isFunction(ctorA)&&ctorA instanceof ctorA&&isFunction(ctorB)&&ctorB instanceof ctorB)&&("constructor"in a&&"constructor"in b)){return false}}var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==a){return stackB[length]==b}}var size=0;result=true;stackA.push(a);stackB.push(b);if(isArr){length=a.length;size=b.length;result=size==length;if(result||isWhere){while(size--){var index=length,value=b[size];if(isWhere){while(index--){if(result=baseIsEqual(a[index],value,callback,isWhere,stackA,stackB)){break}}}else if(!(result=baseIsEqual(a[size],value,callback,isWhere,stackA,stackB))){break}}}}else{forIn(b,function(value,key,b){if(hasOwnProperty.call(b,key)){size++;return result=hasOwnProperty.call(a,key)&&baseIsEqual(a[key],value,callback,isWhere,stackA,stackB)}});if(result&&!isWhere){forIn(a,function(value,key,a){if(hasOwnProperty.call(a,key)){return result=--size>-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index<length){var value=array[index],computed=callback?callback(value,index,array):value;if(isSorted?!index||seen[seen.length-1]!==computed:indexOf(seen,computed)<0){if(callback||isLarge){seen.push(computed)}result.push(value)}}if(isLarge){releaseArray(seen.array);releaseObject(seen)}else if(callback){releaseArray(seen)}return result}function createAggregator(setter){return function(collection,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];setter(result,value,callback(value,index,collection),collection)}}else{forOwn(collection,function(value,key,collection){setter(result,value,callback(value,key,collection),collection)})}return result}}function createWrapper(func,bitmask,partialArgs,partialRightArgs,thisArg,arity){var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,isPartial=bitmask&16,isPartialRight=bitmask&32;if(!isBindKey&&!isFunction(func)){throw new TypeError}if(isPartial&&!partialArgs.length){bitmask&=~16;isPartial=partialArgs=false}if(isPartialRight&&!partialRightArgs.length){bitmask&=~32;isPartialRight=partialRightArgs=false}var bindData=func&&func.__bindData__;if(bindData&&bindData!==true){bindData=slice(bindData);if(bindData[2]){bindData[2]=slice(bindData[2])}if(bindData[3]){bindData[3]=slice(bindData[3])}if(isBind&&!(bindData[1]&1)){bindData[4]=thisArg}if(!isBind&&bindData[1]&1){bitmask|=8}if(isCurry&&!(bindData[1]&4)){bindData[5]=arity}if(isPartial){push.apply(bindData[2]||(bindData[2]=[]),partialArgs)}if(isPartialRight){unshift.apply(bindData[3]||(bindData[3]=[]),partialRightArgs)}bindData[1]|=bitmask;return createWrapper.apply(null,bindData)}var creater=bitmask==1||bitmask===17?baseBind:baseCreateWrapper;return creater([func,bitmask,partialArgs,partialRightArgs,thisArg,arity])}function escapeHtmlChar(match){return htmlEscapes[match]}function getIndexOf(){var result=(result=lodash.indexOf)===indexOf?baseIndexOf:result;return result}function isNative(value){return typeof value=="function"&&reNative.test(value)}var setBindData=!defineProperty?noop:function(func,value){descriptor.value=value;defineProperty(func,"__bindData__",descriptor)};function shimIsPlainObject(value){var ctor,result;if(!(value&&toString.call(value)==objectClass)||(ctor=value.constructor,isFunction(ctor)&&!(ctor instanceof ctor))){return false}forIn(value,function(value,key){result=key});return typeof result=="undefined"||hasOwnProperty.call(value,result)}function unescapeHtmlChar(match){return htmlUnescapes[match]}function isArguments(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==argsClass||false}var isArray=nativeIsArray||function(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==arrayClass||false};var shimKeys=function(object){var index,iterable=object,result=[];if(!iterable)return result;if(!objectTypes[typeof object])return result;for(index in iterable){if(hasOwnProperty.call(iterable,index)){result.push(index)}}return result};var keys=!nativeKeys?shimKeys:function(object){if(!isObject(object)){return[]}return nativeKeys(object)};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];result[index]=callback?callback(result[index],iterable[index]):iterable[index]}}}return result};function clone(value,isDeep,callback,thisArg){if(typeof isDeep!="boolean"&&isDeep!=null){thisArg=callback;callback=isDeep;isDeep=false}return baseClone(value,isDeep,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function cloneDeep(value,callback,thisArg){return baseClone(value,true,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function create(prototype,properties){var result=baseCreate(prototype);return properties?assign(result,properties):result}var defaults=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(typeof result[index]=="undefined")result[index]=iterable[index]}}}return result};function findKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}function findLastKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwnRight(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}var forIn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);for(index in iterable){if(callback(iterable[index],index,collection)===false)return result}return result};function forInRight(object,callback,thisArg){var pairs=[];forIn(object,function(value,key){pairs.push(key,value)});var length=pairs.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){if(callback(pairs[length--],pairs[length],object)===false){break}}return object}var forOwn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(callback(iterable[index],index,collection)===false)return result}return result};function forOwnRight(object,callback,thisArg){var props=keys(object),length=props.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){var key=props[length];if(callback(object[key],key,object)===false){break}}return object}function functions(object){var result=[];forIn(object,function(value,key){if(isFunction(value)){result.push(key)}});return result.sort()}function has(object,key){return object?hasOwnProperty.call(object,key):false}function invert(object){var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index];result[object[key]]=key}return result}function isBoolean(value){return value===true||value===false||value&&typeof value=="object"&&toString.call(value)==boolClass||false}function isDate(value){return value&&typeof value=="object"&&toString.call(value)==dateClass||false}function isElement(value){return value&&value.nodeType===1||false}function isEmpty(value){var result=true;if(!value){return result}var className=toString.call(value),length=value.length;if(className==arrayClass||className==stringClass||className==argsClass||className==objectClass&&typeof length=="number"&&isFunction(value.splice)){return!length}forOwn(value,function(){return result=false});return result}function isEqual(a,b,callback,thisArg){return baseIsEqual(a,b,typeof callback=="function"&&baseCreateCallback(callback,thisArg,2))}function isFinite(value){return nativeIsFinite(value)&&!nativeIsNaN(parseFloat(value))}function isFunction(value){return typeof value=="function"}function isObject(value){return!!(value&&objectTypes[typeof value])}function isNaN(value){return isNumber(value)&&value!=+value}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||value&&typeof value=="object"&&toString.call(value)==numberClass||false}var isPlainObject=!getPrototypeOf?shimIsPlainObject:function(value){if(!(value&&toString.call(value)==objectClass)){return false}var valueOf=value.valueOf,objProto=isNative(valueOf)&&(objProto=getPrototypeOf(valueOf))&&getPrototypeOf(objProto);return objProto?value==objProto||getPrototypeOf(value)==objProto:shimIsPlainObject(value)};function isRegExp(value){return value&&typeof value=="object"&&toString.call(value)==regexpClass||false}function isString(value){return typeof value=="string"||value&&typeof value=="object"&&toString.call(value)==stringClass||false}function isUndefined(value){return typeof value=="undefined"}function mapValues(object,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){result[key]=callback(value,key,object)});return result}function merge(object){var args=arguments,length=2;if(!isObject(object)){return object}if(typeof args[2]!="number"){length=args.length}if(length>3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index<length){baseMerge(object,sources[index],callback,stackA,stackB)}releaseArray(stackA);releaseArray(stackB);return object}function omit(object,callback,thisArg){var result={};if(typeof callback!="function"){var props=[];forIn(object,function(value,key){props.push(key)});props=baseDifference(props,baseFlatten(arguments,true,false,1));var index=-1,length=props.length;while(++index<length){var key=props[index];result[key]=object[key]}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(!callback(value,key,object)){result[key]=value}})}return result}function pairs(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}function pick(object,callback,thisArg){var result={};if(typeof callback!="function"){var index=-1,props=baseFlatten(arguments,true,false,1),length=isObject(object)?props.length:0;while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(callback(value,key,object)){result[key]=value}})}return result}function transform(object,callback,accumulator,thisArg){var isArr=isArray(object);if(accumulator==null){if(isArr){accumulator=[]}else{var ctor=object&&object.constructor,proto=ctor&&ctor.prototype;accumulator=baseCreate(proto)}}if(callback){callback=lodash.createCallback(callback,thisArg,4);(isArr?forEach:forOwn)(object,function(value,index,object){return callback(accumulator,value,index,object)})}return accumulator}function values(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function at(collection){var args=arguments,index=-1,props=baseFlatten(args,true,false,1),length=args[2]&&args[2][args[1]]===collection?1:props.length,result=Array(length);while(++index<length){result[index]=collection[props[index]]}return result}function contains(collection,target,fromIndex){var index=-1,indexOf=getIndexOf(),length=collection?collection.length:0,result=false;fromIndex=(fromIndex<0?nativeMax(0,length+fromIndex):fromIndex)||0;if(isArray(collection)){result=indexOf(collection,target,fromIndex)>-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(!(result=!!callback(collection[index],index,collection))){break}}}else{forOwn(collection,function(value,index,collection){return result=!!callback(value,index,collection)})}return result}function filter(collection,callback,thisArg){var result=[];callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){result.push(value)}}}else{forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result.push(value)}})}return result}function find(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){return value}}}else{var result;forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}}function findLast(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forEachRight(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}function forEach(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(++index<length){if(callback(collection[index],index,collection)===false){break}}}else{forOwn(collection,callback)}return collection}function forEachRight(collection,callback,thisArg){var length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(length--){if(callback(collection[length],length,collection)===false){break}}}else{var props=keys(collection);length=props.length;forOwn(collection,function(value,key,collection){key=props?props[--length]:--length;return callback(collection[key],key,collection)})}return collection}var groupBy=createAggregator(function(result,value,key){(hasOwnProperty.call(result,key)?result[key]:result[key]=[]).push(value)});var indexBy=createAggregator(function(result,value,key){result[key]=value});function invoke(collection,methodName){var args=slice(arguments,2),index=-1,isFunc=typeof methodName=="function",length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){result[++index]=(isFunc?methodName:value[methodName]).apply(value,args)});return result}function map(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=lodash.createCallback(callback,thisArg,3);if(typeof length=="number"){var result=Array(length);while(++index<length){result[index]=callback(collection[index],index,collection)}}else{result=[];forOwn(collection,function(value,key,collection){result[++index]=callback(value,key,collection)})}return result}function max(collection,callback,thisArg){var computed=-Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value>result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value<result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current<computed){computed=current;result=value}})}return result}var pluck=map;function reduce(collection,callback,accumulator,thisArg){if(!collection)return accumulator;var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);var index=-1,length=collection.length;if(typeof length=="number"){if(noaccum){accumulator=collection[++index]}while(++index<length){accumulator=callback(accumulator,collection[index],index,collection)}}else{forOwn(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)})}return accumulator}function reduceRight(collection,callback,accumulator,thisArg){var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);forEachRight(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)});return accumulator}function reject(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);return filter(collection,function(value,index,collection){return!callback(value,index,collection)})}function sample(collection,n,guard){if(collection&&typeof collection.length!="number"){collection=values(collection)}if(n==null||guard){return collection?collection[baseRandom(0,collection.length-1)]:undefined}var result=shuffle(collection);result.length=nativeMin(nativeMax(0,n),result.length);return result}function shuffle(collection){var index=-1,length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){var rand=baseRandom(0,++index);result[index]=result[rand];result[rand]=value});return result}function size(collection){var length=collection?collection.length:0;return typeof length=="number"?length:keys(collection).length}function some(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(result=callback(collection[index],index,collection)){break}}}else{forOwn(collection,function(value,index,collection){return!(result=callback(value,index,collection))})}return!!result}function sortBy(collection,callback,thisArg){var index=-1,isArr=isArray(callback),length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);if(!isArr){callback=lodash.createCallback(callback,thisArg,3)}forEach(collection,function(value,key,collection){var object=result[++index]=getObject();if(isArr){object.criteria=map(callback,function(key){return value[key]})}else{(object.criteria=getArray())[0]=callback(value,key,collection)}object.index=index;object.value=value});length=result.length;result.sort(compareAscending);while(length--){var object=result[length];result[length]=object.value;if(!isArr){releaseArray(object.criteria)}releaseObject(object)}return result}function toArray(collection){if(collection&&typeof collection.length=="number"){return slice(collection)}return values(collection)}var where=filter;function compact(array){var index=-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value){result.push(value)}}return result}function difference(array){return baseDifference(array,baseFlatten(arguments,true,true,1))}function findIndex(array,callback,thisArg){var index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length){if(callback(array[index],index,array)){return index}}return-1}function findLastIndex(array,callback,thisArg){var length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(length--){if(callback(array[length],length,array)){return length}}return-1}function first(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=-1;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[0]:undefined}}return slice(array,0,nativeMin(nativeMax(0,n),length))}function flatten(array,isShallow,callback,thisArg){if(typeof isShallow!="boolean"&&isShallow!=null){thisArg=callback;callback=typeof isShallow!="function"&&thisArg&&thisArg[isShallow]===array?null:isShallow;isShallow=false}if(callback!=null){array=map(array,callback,thisArg)}return baseFlatten(array,isShallow)}function indexOf(array,value,fromIndex){if(typeof fromIndex=="number"){var length=array?array.length:0;fromIndex=fromIndex<0?nativeMax(0,length+fromIndex):fromIndex||0}else if(fromIndex){var index=sortedIndex(array,value);return array[index]===value?index:-1}return baseIndexOf(array,value,fromIndex)}function initial(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:callback||n}return slice(array,0,nativeMin(nativeMax(0,length-n),length))}function intersection(){var args=[],argsIndex=-1,argsLength=arguments.length,caches=getArray(),indexOf=getIndexOf(),trustIndexOf=indexOf===baseIndexOf,seen=getArray();while(++argsIndex<argsLength){var value=arguments[argsIndex];if(isArray(value)||isArguments(value)){args.push(value);caches.push(trustIndexOf&&value.length>=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index<length){var cache=caches[0];value=array[index];if((cache?cacheIndexOf(cache,value):indexOf(seen,value))<0){argsIndex=argsLength;(cache||seen).push(value);while(--argsIndex){cache=caches[argsIndex];if((cache?cacheIndexOf(cache,value):indexOf(args[argsIndex],value))<0){continue outer}}result.push(value)}}while(argsLength--){cache=caches[argsLength];if(cache){releaseObject(cache)}}releaseArray(caches);releaseArray(seen);return result}function last(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[length-1]:undefined}}return slice(array,nativeMax(0,length-n))}function lastIndexOf(array,value,fromIndex){var index=array?array.length:0;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(0,index+fromIndex):nativeMin(fromIndex,index-1))+1}while(index--){if(array[index]===value){return index}}return-1}function pull(array){var args=arguments,argsIndex=0,argsLength=args.length,length=array?array.length:0;while(++argsIndex<argsLength){var index=-1,value=args[argsIndex];while(++index<length){if(array[index]===value){splice.call(array,index--,1);length--}}}return array}function range(start,end,step){start=+start||0;step=typeof step=="number"?step:+step||1;if(end==null){end=start;start=0}var index=-1,length=nativeMax(0,ceil((end-start)/(step||1))),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function remove(array,callback,thisArg){var index=-1,length=array?array.length:0,result=[];callback=lodash.createCallback(callback,thisArg,3);while(++index<length){var value=array[index];if(callback(value,index,array)){result.push(value);splice.call(array,index--,1);length--}}return result}function rest(array,callback,thisArg){if(typeof callback!="number"&&callback!=null){var n=0,index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:nativeMax(0,callback)}return slice(array,n)}function sortedIndex(array,value,callback,thisArg){var low=0,high=array?array.length:low;callback=callback?lodash.createCallback(callback,thisArg,1):identity;value=callback(value);while(low<high){var mid=low+high>>>1;callback(array[mid])<value?low=mid+1:high=mid}return low}function union(){return baseUniq(baseFlatten(arguments,true,true))}function uniq(array,isSorted,callback,thisArg){if(typeof isSorted!="boolean"&&isSorted!=null){thisArg=callback;callback=typeof isSorted!="function"&&thisArg&&thisArg[isSorted]===array?null:isSorted;isSorted=false}if(callback!=null){callback=lodash.createCallback(callback,thisArg,3)}return baseUniq(array,isSorted,callback)}function without(array){return baseDifference(array,slice(arguments,1))}function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArray(array)||isArguments(array)){var result=result?baseUniq(baseDifference(result,array).concat(baseDifference(array,result))):array}}return result||[]}function zip(){var array=arguments.length>1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index<length){result[index]=pluck(array,index)}return result}function zipObject(keys,values){var index=-1,length=keys?keys.length:0,result={};if(!values&&length&&!isArray(keys[0])){values=[]}while(++index<length){var key=keys[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}function after(n,func){if(!isFunction(func)){throw new TypeError}return function(){if(--n<1){return func.apply(this,arguments)}}}function bind(func,thisArg){return arguments.length>2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index<length){var key=funcs[index];object[key]=createWrapper(object[key],1,null,null,object)}return object}function bindKey(object,key){return arguments.length>2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId);
-
-}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index<n){result[index]=callback(index)}return result}function unescape(string){return string==null?"":String(string).replace(reEscapedHtml,unescapeHtmlChar)}function uniqueId(prefix){var id=++idCounter;return String(prefix==null?"":prefix)+id}function chain(value){value=new lodashWrapper(value);value.__chain__=true;return value}function tap(value,interceptor){interceptor(value);return value}function wrapperChain(){this.__chain__=true;return this}function wrapperToString(){return String(this.__wrapped__)}function wrapperValueOf(){return this.__wrapped__}lodash.after=after;lodash.assign=assign;lodash.at=at;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.chain=chain;lodash.compact=compact;lodash.compose=compose;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.createCallback=createCallback;lodash.curry=curry;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.filter=filter;lodash.flatten=flatten;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.map=map;lodash.mapValues=mapValues;lodash.max=max;lodash.memoize=memoize;lodash.merge=merge;lodash.min=min;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.pull=pull;lodash.range=range;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.shuffle=shuffle;lodash.sortBy=sortBy;lodash.tap=tap;lodash.throttle=throttle;lodash.times=times;lodash.toArray=toArray;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.values=values;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.collect=map;lodash.drop=rest;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;lodash.unzip=zip;mixin(lodash);lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.contains=contains;lodash.escape=escape;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.has=has;lodash.identity=identity;lodash.indexOf=indexOf;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isNaN=isNaN;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isUndefined=isUndefined;lodash.lastIndexOf=lastIndexOf;lodash.mixin=mixin;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.result=result;lodash.runInContext=runInContext;lodash.size=size;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.template=template;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.all=every;lodash.any=some;lodash.detect=find;lodash.findWhere=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.include=contains;lodash.inject=reduce;mixin(function(){var source={};forOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.first=first;lodash.last=last;lodash.sample=sample;lodash.take=first;lodash.head=first;forOwn(lodash,function(func,methodName){var callbackable=methodName!=="sample";if(!lodash.prototype[methodName]){lodash.prototype[methodName]=function(n,guard){var chainAll=this.__chain__,result=func(this.__wrapped__,n,guard);return!chainAll&&(n==null||guard&&!(callbackable&&typeof n=="function"))?result:new lodashWrapper(result,chainAll)}}});lodash.VERSION="2.4.1";lodash.prototype.chain=wrapperChain;lodash.prototype.toString=wrapperToString;lodash.prototype.value=wrapperValueOf;lodash.prototype.valueOf=wrapperValueOf;forEach(["join","pop","shift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){var chainAll=this.__chain__,result=func.apply(this.__wrapped__,arguments);return chainAll?new lodashWrapper(result,chainAll):result}});forEach(["push","reverse","sort","unshift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){func.apply(this.__wrapped__,arguments);return this}});forEach(["concat","slice","splice"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){return new lodashWrapper(func.apply(this.__wrapped__,arguments),this.__chain__)}});return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}]},{},[1])(1)});
\ No newline at end of file
+}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length<arity){bitmask|=16&~32;return baseCreateWrapper([func,isCurryBound?bitmask:bitmask&~3,args,null,thisArg,arity])}}args||(args=arguments);if(isBindKey){func=thisBinding[key]}if(this instanceof bound){thisBinding=baseCreate(func.prototype);var result=func.apply(thisBinding,args);return isObject(result)?result:thisBinding}return func.apply(thisBinding,args)}setBindData(bound,bindData);return bound}function baseDifference(array,values){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,isLarge=length>=largeArraySize&&indexOf===baseIndexOf,result=[];if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index<length){var value=array[index];if(indexOf(values,value)<0){result.push(value)}}if(isLarge){releaseObject(values)}return result}function baseFlatten(array,isShallow,isStrict,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value&&typeof value=="object"&&typeof value.length=="number"&&(isArray(value)||isArguments(value))){if(!isShallow){value=baseFlatten(value,isShallow,isStrict)}var valIndex=-1,valLength=value.length,resIndex=result.length;result.length+=valLength;while(++valIndex<valLength){result[resIndex++]=value[valIndex]}}else if(!isStrict){result.push(value)}}return result}function baseIsEqual(a,b,callback,isWhere,stackA,stackB){if(callback){var result=callback(a,b);if(typeof result!="undefined"){return!!result}}if(a===b){return a!==0||1/a==1/b}var type=typeof a,otherType=typeof b;if(a===a&&!(a&&objectTypes[type])&&!(b&&objectTypes[otherType])){return false}if(a==null||b==null){return a===b}var className=toString.call(a),otherClass=toString.call(b);if(className==argsClass){className=objectClass}if(otherClass==argsClass){otherClass=objectClass}if(className!=otherClass){return false}switch(className){case boolClass:case dateClass:return+a==+b;case numberClass:return a!=+a?b!=+b:a==0?1/a==1/b:a==+b;case regexpClass:case stringClass:return a==String(b)}var isArr=className==arrayClass;if(!isArr){var aWrapped=hasOwnProperty.call(a,"__wrapped__"),bWrapped=hasOwnProperty.call(b,"__wrapped__");if(aWrapped||bWrapped){return baseIsEqual(aWrapped?a.__wrapped__:a,bWrapped?b.__wrapped__:b,callback,isWhere,stackA,stackB)}if(className!=objectClass){return false}var ctorA=a.constructor,ctorB=b.constructor;if(ctorA!=ctorB&&!(isFunction(ctorA)&&ctorA instanceof ctorA&&isFunction(ctorB)&&ctorB instanceof ctorB)&&("constructor"in a&&"constructor"in b)){return false}}var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==a){return stackB[length]==b}}var size=0;result=true;stackA.push(a);stackB.push(b);if(isArr){length=a.length;size=b.length;result=size==length;if(result||isWhere){while(size--){var index=length,value=b[size];if(isWhere){while(index--){if(result=baseIsEqual(a[index],value,callback,isWhere,stackA,stackB)){break}}}else if(!(result=baseIsEqual(a[size],value,callback,isWhere,stackA,stackB))){break}}}}else{forIn(b,function(value,key,b){if(hasOwnProperty.call(b,key)){size++;return result=hasOwnProperty.call(a,key)&&baseIsEqual(a[key],value,callback,isWhere,stackA,stackB)}});if(result&&!isWhere){forIn(a,function(value,key,a){if(hasOwnProperty.call(a,key)){return result=--size>-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index<length){var value=array[index],computed=callback?callback(value,index,array):value;if(isSorted?!index||seen[seen.length-1]!==computed:indexOf(seen,computed)<0){if(callback||isLarge){seen.push(computed)}result.push(value)}}if(isLarge){releaseArray(seen.array);releaseObject(seen)}else if(callback){releaseArray(seen)}return result}function createAggregator(setter){return function(collection,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];setter(result,value,callback(value,index,collection),collection)}}else{forOwn(collection,function(value,key,collection){setter(result,value,callback(value,key,collection),collection)})}return result}}function createWrapper(func,bitmask,partialArgs,partialRightArgs,thisArg,arity){var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,isPartial=bitmask&16,isPartialRight=bitmask&32;if(!isBindKey&&!isFunction(func)){throw new TypeError}if(isPartial&&!partialArgs.length){bitmask&=~16;isPartial=partialArgs=false}if(isPartialRight&&!partialRightArgs.length){bitmask&=~32;isPartialRight=partialRightArgs=false}var bindData=func&&func.__bindData__;if(bindData&&bindData!==true){bindData=slice(bindData);if(bindData[2]){bindData[2]=slice(bindData[2])}if(bindData[3]){bindData[3]=slice(bindData[3])}if(isBind&&!(bindData[1]&1)){bindData[4]=thisArg}if(!isBind&&bindData[1]&1){bitmask|=8}if(isCurry&&!(bindData[1]&4)){bindData[5]=arity}if(isPartial){push.apply(bindData[2]||(bindData[2]=[]),partialArgs)}if(isPartialRight){unshift.apply(bindData[3]||(bindData[3]=[]),partialRightArgs)}bindData[1]|=bitmask;return createWrapper.apply(null,bindData)}var creater=bitmask==1||bitmask===17?baseBind:baseCreateWrapper;return creater([func,bitmask,partialArgs,partialRightArgs,thisArg,arity])}function escapeHtmlChar(match){return htmlEscapes[match]}function getIndexOf(){var result=(result=lodash.indexOf)===indexOf?baseIndexOf:result;return result}function isNative(value){return typeof value=="function"&&reNative.test(value)}var setBindData=!defineProperty?noop:function(func,value){descriptor.value=value;defineProperty(func,"__bindData__",descriptor)};function shimIsPlainObject(value){var ctor,result;if(!(value&&toString.call(value)==objectClass)||(ctor=value.constructor,isFunction(ctor)&&!(ctor instanceof ctor))){return false}forIn(value,function(value,key){result=key});return typeof result=="undefined"||hasOwnProperty.call(value,result)}function unescapeHtmlChar(match){return htmlUnescapes[match]}function isArguments(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==argsClass||false}var isArray=nativeIsArray||function(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==arrayClass||false};var shimKeys=function(object){var index,iterable=object,result=[];if(!iterable)return result;if(!objectTypes[typeof object])return result;for(index in iterable){if(hasOwnProperty.call(iterable,index)){result.push(index)}}return result};var keys=!nativeKeys?shimKeys:function(object){if(!isObject(object)){return[]}return nativeKeys(object)};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];result[index]=callback?callback(result[index],iterable[index]):iterable[index]}}}return result};function clone(value,isDeep,callback,thisArg){if(typeof isDeep!="boolean"&&isDeep!=null){thisArg=callback;callback=isDeep;isDeep=false}return baseClone(value,isDeep,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function cloneDeep(value,callback,thisArg){return baseClone(value,true,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function create(prototype,properties){var result=baseCreate(prototype);return properties?assign(result,properties):result}var defaults=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(typeof result[index]=="undefined")result[index]=iterable[index]}}}return result};function findKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}function findLastKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwnRight(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}var forIn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);for(index in iterable){if(callback(iterable[index],index,collection)===false)return result}return result};function forInRight(object,callback,thisArg){var pairs=[];forIn(object,function(value,key){pairs.push(key,value)});var length=pairs.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){if(callback(pairs[length--],pairs[length],object)===false){break}}return object}var forOwn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(callback(iterable[index],index,collection)===false)return result}return result};function forOwnRight(object,callback,thisArg){var props=keys(object),length=props.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){var key=props[length];if(callback(object[key],key,object)===false){break}}return object}function functions(object){var result=[];forIn(object,function(value,key){if(isFunction(value)){result.push(key)}});return result.sort()}function has(object,key){return object?hasOwnProperty.call(object,key):false}function invert(object){var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index];result[object[key]]=key}return result}function isBoolean(value){return value===true||value===false||value&&typeof value=="object"&&toString.call(value)==boolClass||false}function isDate(value){return value&&typeof value=="object"&&toString.call(value)==dateClass||false}function isElement(value){return value&&value.nodeType===1||false}function isEmpty(value){var result=true;if(!value){return result}var className=toString.call(value),length=value.length;if(className==arrayClass||className==stringClass||className==argsClass||className==objectClass&&typeof length=="number"&&isFunction(value.splice)){return!length}forOwn(value,function(){return result=false});return result}function isEqual(a,b,callback,thisArg){return baseIsEqual(a,b,typeof callback=="function"&&baseCreateCallback(callback,thisArg,2))}function isFinite(value){return nativeIsFinite(value)&&!nativeIsNaN(parseFloat(value))}function isFunction(value){return typeof value=="function"}function isObject(value){return!!(value&&objectTypes[typeof value])}function isNaN(value){return isNumber(value)&&value!=+value}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||value&&typeof value=="object"&&toString.call(value)==numberClass||false}var isPlainObject=!getPrototypeOf?shimIsPlainObject:function(value){if(!(value&&toString.call(value)==objectClass)){return false}var valueOf=value.valueOf,objProto=isNative(valueOf)&&(objProto=getPrototypeOf(valueOf))&&getPrototypeOf(objProto);return objProto?value==objProto||getPrototypeOf(value)==objProto:shimIsPlainObject(value)};function isRegExp(value){return value&&typeof value=="object"&&toString.call(value)==regexpClass||false}function isString(value){return typeof value=="string"||value&&typeof value=="object"&&toString.call(value)==stringClass||false}function isUndefined(value){return typeof value=="undefined"}function mapValues(object,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){result[key]=callback(value,key,object)});return result}function merge(object){var args=arguments,length=2;if(!isObject(object)){return object}if(typeof args[2]!="number"){length=args.length}if(length>3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index<length){baseMerge(object,sources[index],callback,stackA,stackB)}releaseArray(stackA);releaseArray(stackB);return object}function omit(object,callback,thisArg){var result={};if(typeof callback!="function"){var props=[];forIn(object,function(value,key){props.push(key)});props=baseDifference(props,baseFlatten(arguments,true,false,1));var index=-1,length=props.length;while(++index<length){var key=props[index];result[key]=object[key]}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(!callback(value,key,object)){result[key]=value}})}return result}function pairs(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}function pick(object,callback,thisArg){var result={};if(typeof callback!="function"){var index=-1,props=baseFlatten(arguments,true,false,1),length=isObject(object)?props.length:0;while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(callback(value,key,object)){result[key]=value}})}return result}function transform(object,callback,accumulator,thisArg){var isArr=isArray(object);if(accumulator==null){if(isArr){accumulator=[]}else{var ctor=object&&object.constructor,proto=ctor&&ctor.prototype;accumulator=baseCreate(proto)}}if(callback){callback=lodash.createCallback(callback,thisArg,4);(isArr?forEach:forOwn)(object,function(value,index,object){return callback(accumulator,value,index,object)})}return accumulator}function values(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function at(collection){var args=arguments,index=-1,props=baseFlatten(args,true,false,1),length=args[2]&&args[2][args[1]]===collection?1:props.length,result=Array(length);while(++index<length){result[index]=collection[props[index]]}return result}function contains(collection,target,fromIndex){var index=-1,indexOf=getIndexOf(),length=collection?collection.length:0,result=false;fromIndex=(fromIndex<0?nativeMax(0,length+fromIndex):fromIndex)||0;if(isArray(collection)){result=indexOf(collection,target,fromIndex)>-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(!(result=!!callback(collection[index],index,collection))){break}}}else{forOwn(collection,function(value,index,collection){return result=!!callback(value,index,collection)})}return result}function filter(collection,callback,thisArg){var result=[];callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){result.push(value)}}}else{forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result.push(value)}})}return result}function find(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){return value}}}else{var result;forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}}function findLast(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forEachRight(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}function forEach(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(++index<length){if(callback(collection[index],index,collection)===false){break}}}else{forOwn(collection,callback)}return collection}function forEachRight(collection,callback,thisArg){var length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(length--){if(callback(collection[length],length,collection)===false){break}}}else{var props=keys(collection);length=props.length;forOwn(collection,function(value,key,collection){key=props?props[--length]:--length;return callback(collection[key],key,collection)})}return collection}var groupBy=createAggregator(function(result,value,key){(hasOwnProperty.call(result,key)?result[key]:result[key]=[]).push(value)});var indexBy=createAggregator(function(result,value,key){result[key]=value});function invoke(collection,methodName){var args=slice(arguments,2),index=-1,isFunc=typeof methodName=="function",length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){result[++index]=(isFunc?methodName:value[methodName]).apply(value,args)});return result}function map(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=lodash.createCallback(callback,thisArg,3);if(typeof length=="number"){var result=Array(length);while(++index<length){result[index]=callback(collection[index],index,collection)}}else{result=[];forOwn(collection,function(value,key,collection){result[++index]=callback(value,key,collection)})}return result}function max(collection,callback,thisArg){var computed=-Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value>result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value<result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current<computed){computed=current;result=value}})}return result}var pluck=map;function reduce(collection,callback,accumulator,thisArg){if(!collection)return accumulator;var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);var index=-1,length=collection.length;if(typeof length=="number"){if(noaccum){accumulator=collection[++index]}while(++index<length){accumulator=callback(accumulator,collection[index],index,collection)}}else{forOwn(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)})}return accumulator}function reduceRight(collection,callback,accumulator,thisArg){var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);forEachRight(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)});return accumulator}function reject(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);return filter(collection,function(value,index,collection){return!callback(value,index,collection)})}function sample(collection,n,guard){if(collection&&typeof collection.length!="number"){collection=values(collection)}if(n==null||guard){return collection?collection[baseRandom(0,collection.length-1)]:undefined}var result=shuffle(collection);result.length=nativeMin(nativeMax(0,n),result.length);return result}function shuffle(collection){var index=-1,length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){var rand=baseRandom(0,++index);result[index]=result[rand];result[rand]=value});return result}function size(collection){var length=collection?collection.length:0;return typeof length=="number"?length:keys(collection).length}function some(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(result=callback(collection[index],index,collection)){break}}}else{forOwn(collection,function(value,index,collection){return!(result=callback(value,index,collection))})}return!!result}function sortBy(collection,callback,thisArg){var index=-1,isArr=isArray(callback),length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);if(!isArr){callback=lodash.createCallback(callback,thisArg,3)}forEach(collection,function(value,key,collection){var object=result[++index]=getObject();if(isArr){object.criteria=map(callback,function(key){return value[key]})}else{(object.criteria=getArray())[0]=callback(value,key,collection)}object.index=index;object.value=value});length=result.length;result.sort(compareAscending);while(length--){var object=result[length];result[length]=object.value;if(!isArr){releaseArray(object.criteria)}releaseObject(object)}return result}function toArray(collection){if(collection&&typeof collection.length=="number"){return slice(collection)}return values(collection)}var where=filter;function compact(array){var index=-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value){result.push(value)}}return result}function difference(array){return baseDifference(array,baseFlatten(arguments,true,true,1))}function findIndex(array,callback,thisArg){var index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length){if(callback(array[index],index,array)){return index}}return-1}function findLastIndex(array,callback,thisArg){var length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(length--){if(callback(array[length],length,array)){return length}}return-1}function first(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=-1;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[0]:undefined}}return slice(array,0,nativeMin(nativeMax(0,n),length))}function flatten(array,isShallow,callback,thisArg){if(typeof isShallow!="boolean"&&isShallow!=null){thisArg=callback;callback=typeof isShallow!="function"&&thisArg&&thisArg[isShallow]===array?null:isShallow;isShallow=false}if(callback!=null){array=map(array,callback,thisArg)}return baseFlatten(array,isShallow)}function indexOf(array,value,fromIndex){if(typeof fromIndex=="number"){var length=array?array.length:0;fromIndex=fromIndex<0?nativeMax(0,length+fromIndex):fromIndex||0}else if(fromIndex){var index=sortedIndex(array,value);return array[index]===value?index:-1}return baseIndexOf(array,value,fromIndex)}function initial(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:callback||n}return slice(array,0,nativeMin(nativeMax(0,length-n),length))}function intersection(){var args=[],argsIndex=-1,argsLength=arguments.length,caches=getArray(),indexOf=getIndexOf(),trustIndexOf=indexOf===baseIndexOf,seen=getArray();while(++argsIndex<argsLength){var value=arguments[argsIndex];if(isArray(value)||isArguments(value)){args.push(value);caches.push(trustIndexOf&&value.length>=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index<length){var cache=caches[0];value=array[index];if((cache?cacheIndexOf(cache,value):indexOf(seen,value))<0){argsIndex=argsLength;(cache||seen).push(value);while(--argsIndex){cache=caches[argsIndex];if((cache?cacheIndexOf(cache,value):indexOf(args[argsIndex],value))<0){continue outer}}result.push(value)}}while(argsLength--){cache=caches[argsLength];if(cache){releaseObject(cache)}}releaseArray(caches);releaseArray(seen);return result}function last(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[length-1]:undefined}}return slice(array,nativeMax(0,length-n))}function lastIndexOf(array,value,fromIndex){var index=array?array.length:0;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(0,index+fromIndex):nativeMin(fromIndex,index-1))+1}while(index--){if(array[index]===value){return index}}return-1}function pull(array){var args=arguments,argsIndex=0,argsLength=args.length,length=array?array.length:0;while(++argsIndex<argsLength){var index=-1,value=args[argsIndex];while(++index<length){if(array[index]===value){splice.call(array,index--,1);length--}}}return array}function range(start,end,step){start=+start||0;step=typeof step=="number"?step:+step||1;if(end==null){end=start;start=0}var index=-1,length=nativeMax(0,ceil((end-start)/(step||1))),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function remove(array,callback,thisArg){var index=-1,length=array?array.length:0,result=[];callback=lodash.createCallback(callback,thisArg,3);while(++index<length){var value=array[index];if(callback(value,index,array)){result.push(value);splice.call(array,index--,1);length--}}return result}function rest(array,callback,thisArg){if(typeof callback!="number"&&callback!=null){var n=0,index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:nativeMax(0,callback)}return slice(array,n)}function sortedIndex(array,value,callback,thisArg){var low=0,high=array?array.length:low;callback=callback?lodash.createCallback(callback,thisArg,1):identity;value=callback(value);while(low<high){var mid=low+high>>>1;callback(array[mid])<value?low=mid+1:high=mid}return low}function union(){return baseUniq(baseFlatten(arguments,true,true))}function uniq(array,isSorted,callback,thisArg){if(typeof isSorted!="boolean"&&isSorted!=null){thisArg=callback;callback=typeof isSorted!="function"&&thisArg&&thisArg[isSorted]===array?null:isSorted;isSorted=false}if(callback!=null){callback=lodash.createCallback(callback,thisArg,3)}return baseUniq(array,isSorted,callback)}function without(array){return baseDifference(array,slice(arguments,1))}function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArray(array)||isArguments(array)){
+var result=result?baseUniq(baseDifference(result,array).concat(baseDifference(array,result))):array}}return result||[]}function zip(){var array=arguments.length>1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index<length){result[index]=pluck(array,index)}return result}function zipObject(keys,values){var index=-1,length=keys?keys.length:0,result={};if(!values&&length&&!isArray(keys[0])){values=[]}while(++index<length){var key=keys[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}function after(n,func){if(!isFunction(func)){throw new TypeError}return function(){if(--n<1){return func.apply(this,arguments)}}}function bind(func,thisArg){return arguments.length>2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index<length){var key=funcs[index];object[key]=createWrapper(object[key],1,null,null,object)}return object}function bindKey(object,key){return arguments.length>2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId)}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index<n){result[index]=callback(index)}return result}function unescape(string){return string==null?"":String(string).replace(reEscapedHtml,unescapeHtmlChar)}function uniqueId(prefix){var id=++idCounter;return String(prefix==null?"":prefix)+id}function chain(value){value=new lodashWrapper(value);value.__chain__=true;return value}function tap(value,interceptor){interceptor(value);return value}function wrapperChain(){this.__chain__=true;return this}function wrapperToString(){return String(this.__wrapped__)}function wrapperValueOf(){return this.__wrapped__}lodash.after=after;lodash.assign=assign;lodash.at=at;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.chain=chain;lodash.compact=compact;lodash.compose=compose;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.createCallback=createCallback;lodash.curry=curry;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.filter=filter;lodash.flatten=flatten;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.map=map;lodash.mapValues=mapValues;lodash.max=max;lodash.memoize=memoize;lodash.merge=merge;lodash.min=min;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.pull=pull;lodash.range=range;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.shuffle=shuffle;lodash.sortBy=sortBy;lodash.tap=tap;lodash.throttle=throttle;lodash.times=times;lodash.toArray=toArray;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.values=values;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.collect=map;lodash.drop=rest;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;lodash.unzip=zip;mixin(lodash);lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.contains=contains;lodash.escape=escape;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.has=has;lodash.identity=identity;lodash.indexOf=indexOf;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isNaN=isNaN;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isUndefined=isUndefined;lodash.lastIndexOf=lastIndexOf;lodash.mixin=mixin;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.result=result;lodash.runInContext=runInContext;lodash.size=size;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.template=template;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.all=every;lodash.any=some;lodash.detect=find;lodash.findWhere=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.include=contains;lodash.inject=reduce;mixin(function(){var source={};forOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.first=first;lodash.last=last;lodash.sample=sample;lodash.take=first;lodash.head=first;forOwn(lodash,function(func,methodName){var callbackable=methodName!=="sample";if(!lodash.prototype[methodName]){lodash.prototype[methodName]=function(n,guard){var chainAll=this.__chain__,result=func(this.__wrapped__,n,guard);return!chainAll&&(n==null||guard&&!(callbackable&&typeof n=="function"))?result:new lodashWrapper(result,chainAll)}}});lodash.VERSION="2.4.1";lodash.prototype.chain=wrapperChain;lodash.prototype.toString=wrapperToString;lodash.prototype.value=wrapperValueOf;lodash.prototype.valueOf=wrapperValueOf;forEach(["join","pop","shift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){var chainAll=this.__chain__,result=func.apply(this.__wrapped__,arguments);return chainAll?new lodashWrapper(result,chainAll):result}});forEach(["push","reverse","sort","unshift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){func.apply(this.__wrapped__,arguments);return this}});forEach(["concat","slice","splice"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){return new lodashWrapper(func.apply(this.__wrapped__,arguments),this.__chain__)}});return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}]},{},[1])(1)});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 764dd2cfcd76f..8b7b1f60501e4 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -140,8 +140,6 @@ function renderDagViz(forJob) {
     svg.selectAll("#" + nodeId).classed("cached", true);
   });
 
-  // More post-processing
-  drawClusterLabels(svg, forJob);
   resizeSvg(svg);
 }
 
@@ -151,7 +149,7 @@ function renderDagVizForStage(svgContainer) {
   var dot = metadata.select(".dot-file").text();
   var containerId = VizConstants.graphPrefix + metadata.attr("stage-id");
   var container = svgContainer.append("g").attr("id", containerId);
-  renderDot(dot, container, StagePageVizConstants.rankSep);
+  renderDot(dot, container, false);
 
   // Round corners on rectangles
   svgContainer
@@ -209,7 +207,7 @@ function renderDagVizForJob(svgContainer) {
     }
 
     // Actually render the stage
-    renderDot(dot, container, JobPageVizConstants.rankSep);
+    renderDot(dot, container, true);
 
     // Round corners on rectangles
     container
@@ -231,14 +229,14 @@ function renderDagVizForJob(svgContainer) {
 }
 
 /* Render the dot file as an SVG in the given container. */
-function renderDot(dot, container, rankSep) {
+function renderDot(dot, container, forJob) {
   var escaped_dot = dot
     .replace(/&lt;/g, "<")
     .replace(/&gt;/g, ">")
     .replace(/&quot;/g, "\"");
   var g = graphlibDot.read(escaped_dot);
-  g.graph().rankSep = rankSep;
   var renderer = new dagreD3.render();
+  preprocessGraphLayout(g, forJob);
   renderer(container, g);
 }
 
@@ -251,50 +249,37 @@ function graphContainer() { return d3.select("#dag-viz-graph"); }
 function metadataContainer() { return d3.select("#dag-viz-metadata"); }
 
 /*
- * Helper function to create draw a label for each cluster.
- *
- * We need to do this manually because dagre-d3 does not support labeling clusters.
- * In general, the clustering support for dagre-d3 is quite limited at this point.
+ * Helper function to pre-process the graph layout.
+ * This step is necessary for certain styles that affect the positioning
+ * and sizes of graph elements, e.g. padding, font style, shape.
  */
-function drawClusterLabels(svgContainer, forJob) {
-  var clusterLabelSize, stageClusterLabelSize;
+function preprocessGraphLayout(g, forJob) {
+  var nodes = g.nodes();
+  for (var i = 0; i < nodes.length; i++) {
+    var isCluster = g.children(nodes[i]).length > 0;
+    if (!isCluster) {
+      var node = g.node(nodes[i]);
+      if (forJob) {
+        // Do not display RDD name on job page
+        node.shape = "circle";
+        node.labelStyle = "font-size: 0px";
+      } else {
+        node.labelStyle = "font-size: 12px";
+      }
+      node.padding = "5";
+    }
+  }
+  // Curve the edges
+  var edges = g.edges();
+  for (var j = 0; j < edges.length; j++) {
+    edges[j].lineInterpolate = "basis";
+  }
+  // Adjust vertical separation between nodes
   if (forJob) {
-    clusterLabelSize = JobPageVizConstants.clusterLabelSize;
-    stageClusterLabelSize = JobPageVizConstants.stageClusterLabelSize;
+    g.graph().rankSep = JobPageVizConstants.rankSep;
   } else {
-    clusterLabelSize = StagePageVizConstants.clusterLabelSize;
-    stageClusterLabelSize = StagePageVizConstants.stageClusterLabelSize;
+    g.graph().rankSep = StagePageVizConstants.rankSep;
   }
-  svgContainer.selectAll("g.cluster").each(function() {
-    var cluster = d3.select(this);
-    var isStage = cluster.attr("id").indexOf(VizConstants.stageClusterPrefix) > -1;
-    var labelSize = isStage ? stageClusterLabelSize : clusterLabelSize;
-    drawClusterLabel(cluster, labelSize);
-  });
-}
-
-/*
- * Helper function to draw a label for the given cluster element based on its name.
- *
- * In the process, we need to expand the bounding box to make room for the label.
- * We need to do this because dagre-d3 did not take this into account when it first
- * rendered the bounding boxes. Note that this means we need to adjust the view box
- * of the SVG afterwards since we shifted a few boxes around.
- */
-function drawClusterLabel(d3cluster, fontSize) {
-  var cluster = d3cluster;
-  var rect = d3cluster.select("rect");
-  rect.attr("y", toFloat(rect.attr("y")) - fontSize);
-  rect.attr("height", toFloat(rect.attr("height")) + fontSize);
-  var labelX = toFloat(rect.attr("x")) + toFloat(rect.attr("width")) - fontSize / 2;
-  var labelY = toFloat(rect.attr("y")) + fontSize * 1.5;
-  var labelText = cluster.attr("name").replace(VizConstants.clusterPrefix, "");
-  cluster.append("text")
-    .attr("x", labelX)
-    .attr("y", labelY)
-    .attr("text-anchor", "end")
-    .style("font-size", fontSize + "px")
-    .text(labelText);
 }
 
 /*
@@ -444,7 +429,7 @@ function addTooltipsForRDDs(svgContainer) {
     if (tooltipText) {
       node.select("circle")
         .attr("data-toggle", "tooltip")
-        .attr("data-placement", "right")
+        .attr("data-placement", "bottom")
         .attr("title", tooltipText)
     }
   });
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 6a0f5c5d16daa..441c97d6d3e2a 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -360,7 +360,7 @@ private[spark] object UIUtils extends Logging {
         {
           graphs.map { g =>
             <div class="stage-metadata" stage-id={g.rootCluster.id} style="display:none">
-              <div class="dot-file">{RDDOperationGraph.makeDotFile(g, forJob)}</div>
+              <div class="dot-file">{RDDOperationGraph.makeDotFile(g)}</div>
               { g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } }
               { g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } }
               {
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index b470aaa676e78..25d5c6ff7e9cd 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -158,54 +158,32 @@ private[ui] object RDDOperationGraph extends Logging {
    *
    * For the complete DOT specification, see http://www.graphviz.org/Documentation/dotguide.pdf.
    */
-  def makeDotFile(graph: RDDOperationGraph, forJob: Boolean): String = {
+  def makeDotFile(graph: RDDOperationGraph): String = {
     val dotFile = new StringBuilder
     dotFile.append("digraph G {\n")
-    dotFile.append(makeDotSubgraph(graph.rootCluster, forJob, indent = "  "))
-    graph.edges.foreach { edge =>
-      dotFile.append(s"""  ${edge.fromId}->${edge.toId} [lineInterpolate="basis"];\n""")
-    }
+    dotFile.append(makeDotSubgraph(graph.rootCluster, indent = "  "))
+    graph.edges.foreach { edge => dotFile.append(s"""  ${edge.fromId}->${edge.toId};\n""") }
     dotFile.append("}")
     val result = dotFile.toString()
     logDebug(result)
     result
   }
 
-  /**
-   * Return the dot representation of a node in an RDDOperationGraph.
-   *
-   * On the job page, is displayed as a small circle without labels.
-   * On the stage page, it is displayed as a box with an embedded label.
-   */
-  private def makeDotNode(node: RDDOperationNode, forJob: Boolean): String = {
-    val label = s"${node.name} (${node.id})"
-    if (forJob) {
-      s"""${node.id} [label="$label" shape="circle" padding="5" labelStyle="font-size: 0"]"""
-    } else {
-      s"""${node.id} [label="$label" padding="5" labelStyle="font-size: 12px"]"""
-    }
+  /** Return the dot representation of a node in an RDDOperationGraph. */
+  private def makeDotNode(node: RDDOperationNode): String = {
+    s"""${node.id} [label="${node.name} (${node.id})"]"""
   }
 
   /** Return the dot representation of a subgraph in an RDDOperationGraph. */
-  private def makeDotSubgraph(
-      cluster: RDDOperationCluster,
-      forJob: Boolean,
-      indent: String): String = {
+  private def makeDotSubgraph(cluster: RDDOperationCluster, indent: String): String = {
     val subgraph = new StringBuilder
-    // TODO: move specific graph properties like these to spark-dag-viz.js
-    val paddingTop = if (forJob) 10 else 20
     subgraph.append(indent + s"subgraph cluster${cluster.id} {\n")
     subgraph.append(indent + s"""  label="${cluster.name}";\n""")
-    // If there are nested clusters, add some padding
-    // Do this for the stage page because we use bigger fonts there
-    if (cluster.childClusters.nonEmpty) {
-      subgraph.append(indent + s"""  paddingTop="$paddingTop";\n""")
-    }
     cluster.childNodes.foreach { node =>
-      subgraph.append(indent + s"  ${makeDotNode(node, forJob)};\n")
+      subgraph.append(indent + s"  ${makeDotNode(node)};\n")
     }
     cluster.childClusters.foreach { cscope =>
-      subgraph.append(makeDotSubgraph(cscope, forJob, indent + "  "))
+      subgraph.append(makeDotSubgraph(cscope, indent + "  "))
     }
     subgraph.append(indent + "}\n")
     subgraph.toString()

From 4e290522c2a6310636317c54589dc35c91d95486 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Tue, 12 May 2015 11:51:55 -0700
Subject: [PATCH 39/59] [SPARK-7276] [DATAFRAME] speed up DataFrame.select by
 collapsing Project

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #5831 from cloud-fan/7276 and squashes the following commits:

ee4a1e1 [Wenchen Fan] fix rebase mistake
a3b565d [Wenchen Fan] refactor
99deb5d [Wenchen Fan] add test
f1f67ad [Wenchen Fan] fix 7276
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 40 +++++++++++--------
 .../optimizer/FilterPushdownSuite.scala       |  3 +-
 .../org/apache/spark/sql/DataFrame.scala      |  4 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 12 ++++++
 4 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 1ee5fb245fbb2..b163707cc9925 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -43,6 +43,7 @@ object DefaultOptimizer extends Optimizer {
       PushPredicateThroughJoin,
       PushPredicateThroughGenerate,
       ColumnPruning,
+      ProjectCollapsing,
       CombineLimits) ::
     Batch("ConstantFolding", FixedPoint(100),
       NullPropagation,
@@ -114,7 +115,7 @@ object UnionPushdown extends Rule[LogicalPlan] {
  *   - Aggregate
  *   - Project <- Join
  *   - LeftSemiJoin
- *  - Collapse adjacent projections, performing alias substitution.
+ *  - Performing alias substitution.
  */
 object ColumnPruning extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
@@ -152,7 +153,28 @@ object ColumnPruning extends Rule[LogicalPlan] {
 
       Join(left, prunedChild(right, allReferences), LeftSemi, condition)
 
-    // Combine adjacent Projects.
+    case Project(projectList, Limit(exp, child)) =>
+      Limit(exp, Project(projectList, child))
+
+    // Eliminate no-op Projects
+    case Project(projectList, child) if child.output == projectList => child
+  }
+
+  /** Applies a projection only when the child is producing unnecessary attributes */
+  private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) =
+    if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
+      Project(allReferences.filter(c.outputSet.contains).toSeq, c)
+    } else {
+      c
+    }
+}
+
+/**
+ * Combines two adjacent [[Project]] operators into one, merging the
+ * expressions into one single expression.
+ */
+object ProjectCollapsing extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case Project(projectList1, Project(projectList2, child)) =>
       // Create a map of Aliases to their values from the child projection.
       // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)).
@@ -169,21 +191,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
       }).asInstanceOf[Seq[NamedExpression]]
 
       Project(substitutedProjection, child)
-
-    case Project(projectList, Limit(exp, child)) =>
-      Limit(exp, Project(projectList, child))
-      
-    // Eliminate no-op Projects
-    case Project(projectList, child) if child.output == projectList => child
   }
-
-  /** Applies a projection only when the child is producing unnecessary attributes */
-  private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) =
-    if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
-      Project(allReferences.filter(c.outputSet.contains).toSeq, c)
-    } else {
-      c
-    }
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 58d415d9011e1..0c428f7231b8e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -38,7 +38,8 @@ class FilterPushdownSuite extends PlanTest {
         PushPredicateThroughProject,
         PushPredicateThroughJoin,
         PushPredicateThroughGenerate,
-        ColumnPruning) :: Nil
+        ColumnPruning,
+        ProjectCollapsing) :: Nil
   }
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index f3107f7b51ad8..1f85dac682cbe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -567,7 +567,9 @@ class DataFrame private[sql](
       case Column(expr: NamedExpression) => expr
       case Column(expr: Expression) => Alias(expr, expr.prettyString)()
     }
-    Project(namedExpressions.toSeq, logicalPlan)
+    // When user continuously call `select`, speed up analysis by collapsing `Project`
+    import org.apache.spark.sql.catalyst.optimizer.ProjectCollapsing
+    Project(namedExpressions.toSeq, ProjectCollapsing(logicalPlan))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d58438e5d129c..52aa1f6558f80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -493,4 +493,16 @@ class DataFrameSuite extends QueryTest {
       testData.dropDuplicates(Seq("value2")),
       Seq(Row(2, 1, 2), Row(1, 1, 1)))
   }
+
+  test("SPARK-7276: Project collapse for continuous select") {
+    var df = testData
+    for (i <- 1 to 5) {
+      df = df.select($"*")
+    }
+
+    import org.apache.spark.sql.catalyst.plans.logical.Project
+    // make sure df have at most two Projects
+    val p = df.logicalPlan.asInstanceOf[Project].child.asInstanceOf[Project]
+    assert(!p.child.isInstanceOf[Project])
+  }
 }

From b9b01f44f687f35460db9e0ab0b426897747596a Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Tue, 12 May 2015 12:06:30 -0700
Subject: [PATCH 40/59] [HOT FIX #6076] DAG visualization: curve the edges

---
 .../main/resources/org/apache/spark/ui/static/spark-dag-viz.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 8b7b1f60501e4..f7d0d3c61457c 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -272,7 +272,8 @@ function preprocessGraphLayout(g, forJob) {
   // Curve the edges
   var edges = g.edges();
   for (var j = 0; j < edges.length; j++) {
-    edges[j].lineInterpolate = "basis";
+    var edge = g.edge(edges[j]);
+    edge.lineInterpolate = "basis";
   }
   // Adjust vertical separation between nodes
   if (forJob) {

From 8e935b0a214f8b477fe9579fbf6a2d0a27b59118 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 12 May 2015 12:17:05 -0700
Subject: [PATCH 41/59] [SPARK-7487] [ML] Feature Parity in PySpark for
 ml.regression

Added LinearRegression Python API

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #6016 from brkyvz/ml-reg and squashes the following commits:

11c9ef9 [Burak Yavuz] address comments
1027a40 [Burak Yavuz] fix typo
4c699ad [Burak Yavuz] added tree regressor api
8afead2 [Burak Yavuz] made mixin for DT
fa51c74 [Burak Yavuz] save additions
0640d48 [Burak Yavuz] added ml.regression
82aac48 [Burak Yavuz] added linear regression
---
 python/docs/pyspark.ml.rst                    |  16 +
 python/pyspark/ml/classification.py           |   2 +-
 .../ml/param/_shared_params_code_gen.py       |  69 ++-
 python/pyspark/ml/param/shared.py             | 108 ++++
 python/pyspark/ml/regression.py               | 520 ++++++++++++++++++
 python/run-tests                              |   2 +
 6 files changed, 709 insertions(+), 8 deletions(-)
 create mode 100644 python/pyspark/ml/regression.py

diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
index 220ea595df903..a42217a4aee73 100644
--- a/python/docs/pyspark.ml.rst
+++ b/python/docs/pyspark.ml.rst
@@ -25,6 +25,22 @@ pyspark.ml.classification module
     :undoc-members:
     :inherited-members:
 
+pyspark.ml.recommendation module
+-------------------------
+
+.. automodule:: pyspark.ml.recommendation
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.regression module
+-------------------------
+
+.. automodule:: pyspark.ml.regression
+    :members:
+    :undoc-members:
+    :inherited-members:
+
 pyspark.ml.tuning module
 --------------------------------
 
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 45754bc9d4b10..8a009c4ac721f 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -91,7 +91,7 @@ class LogisticRegressionModel(JavaModel):
     globs = globals().copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
-    sc = SparkContext("local[2]", "ml.feature tests")
+    sc = SparkContext("local[2]", "ml.classification tests")
     sqlContext = SQLContext(sc)
     globs['sc'] = sc
     globs['sqlContext'] = sqlContext
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 3be0979b92013..4a5cc6e64f023 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -38,16 +38,13 @@
 # python _shared_params_code_gen.py > shared.py
 
 
-def _gen_param_code(name, doc, defaultValueStr):
+def _gen_param_header(name, doc, defaultValueStr):
     """
-    Generates Python code for a shared param class.
+    Generates the header part for shared variables
 
     :param name: param name
     :param doc: param doc
-    :param defaultValueStr: string representation of the default value
-    :return: code string
     """
-    # TODO: How to correctly inherit instance attributes?
     template = '''class Has$Name(Params):
     """
     Mixin for param $name: $doc.
@@ -61,8 +58,27 @@ def __init__(self):
         #: param for $doc
         self.$name = Param(self, "$name", "$doc")
         if $defaultValueStr is not None:
-            self._setDefault($name=$defaultValueStr)
+            self._setDefault($name=$defaultValueStr)'''
+
+    Name = name[0].upper() + name[1:]
+    return template \
+        .replace("$name", name) \
+        .replace("$Name", Name) \
+        .replace("$doc", doc) \
+        .replace("$defaultValueStr", str(defaultValueStr))
 
+
+def _gen_param_code(name, doc, defaultValueStr):
+    """
+    Generates Python code for a shared param class.
+
+    :param name: param name
+    :param doc: param doc
+    :param defaultValueStr: string representation of the default value
+    :return: code string
+    """
+    # TODO: How to correctly inherit instance attributes?
+    template = '''
     def set$Name(self, value):
         """
         Sets the value of :py:attr:`$name`.
@@ -104,5 +120,44 @@ def get$Name(self):
         ("stepSize", "Step size to be used for each iteration of optimization.", None)]
     code = []
     for name, doc, defaultValueStr in shared:
-        code.append(_gen_param_code(name, doc, defaultValueStr))
+        param_code = _gen_param_header(name, doc, defaultValueStr)
+        code.append(param_code + "\n" + _gen_param_code(name, doc, defaultValueStr))
+
+    decisionTreeParams = [
+        ("maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; " +
+         "depth 1 means 1 internal node + 2 leaf nodes."),
+        ("maxBins", "Max number of bins for" +
+         " discretizing continuous features.  Must be >=2 and >= number of categories for any" +
+         " categorical feature."),
+        ("minInstancesPerNode", "Minimum number of instances each child must have after split. " +
+         "If a split causes the left or right child to have fewer than minInstancesPerNode, the " +
+         "split will be discarded as invalid. Should be >= 1."),
+        ("minInfoGain", "Minimum information gain for a split to be considered at a tree node."),
+        ("maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation."),
+        ("cacheNodeIds", "If false, the algorithm will pass trees to executors to match " +
+         "instances with nodes. If true, the algorithm will cache node IDs for each instance. " +
+         "Caching can speed up training of deeper trees.")]
+
+    decisionTreeCode = '''class DecisionTreeParams(Params):
+    """
+    Mixin for Decision Tree parameters.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    $dummyPlaceHolders
+
+    def __init__(self):
+        super(DecisionTreeParams, self).__init__()
+        $realParams'''
+    dtParamMethods = ""
+    dummyPlaceholders = ""
+    realParams = ""
+    paramTemplate = """$name = Param($owner, "$name", "$doc")"""
+    for name, doc in decisionTreeParams:
+        variable = paramTemplate.replace("$name", name).replace("$doc", doc)
+        dummyPlaceholders += variable.replace("$owner", "Params._dummy()") + "\n    "
+        realParams += "self." + variable.replace("$owner", "self") + "\n        "
+        dtParamMethods += _gen_param_code(name, doc, None) + "\n"
+    code.append(decisionTreeCode.replace("$dummyPlaceHolders", dummyPlaceholders)
+                .replace("$realParams", realParams) + dtParamMethods)
     print("\n\n\n".join(code))
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 4b22322b895b4..779cabe853f8e 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -424,3 +424,111 @@ def getStepSize(self):
         Gets the value of stepSize or its default value.
         """
         return self.getOrDefault(self.stepSize)
+
+
+class DecisionTreeParams(Params):
+    """
+    Mixin for Decision Tree parameters.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    maxDepth = Param(Params._dummy(), "maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.")
+    maxBins = Param(Params._dummy(), "maxBins", "Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.")
+    minInstancesPerNode = Param(Params._dummy(), "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.")
+    minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
+    maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
+    cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
+
+    def __init__(self):
+        super(DecisionTreeParams, self).__init__()
+        #: param for Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+        self.maxDepth = Param(self, "maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.")
+        #: param for Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.
+        self.maxBins = Param(self, "maxBins", "Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.")
+        #: param for Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.
+        self.minInstancesPerNode = Param(self, "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.")
+        #: param for Minimum information gain for a split to be considered at a tree node.
+        self.minInfoGain = Param(self, "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
+        #: param for Maximum memory in MB allocated to histogram aggregation.
+        self.maxMemoryInMB = Param(self, "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
+        #: param for If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.
+        self.cacheNodeIds = Param(self, "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
+
+    def setMaxDepth(self, value):
+        """
+        Sets the value of :py:attr:`maxDepth`.
+        """
+        self.paramMap[self.maxDepth] = value
+        return self
+
+    def getMaxDepth(self):
+        """
+        Gets the value of maxDepth or its default value.
+        """
+        return self.getOrDefault(self.maxDepth)
+
+    def setMaxBins(self, value):
+        """
+        Sets the value of :py:attr:`maxBins`.
+        """
+        self.paramMap[self.maxBins] = value
+        return self
+
+    def getMaxBins(self):
+        """
+        Gets the value of maxBins or its default value.
+        """
+        return self.getOrDefault(self.maxBins)
+
+    def setMinInstancesPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minInstancesPerNode`.
+        """
+        self.paramMap[self.minInstancesPerNode] = value
+        return self
+
+    def getMinInstancesPerNode(self):
+        """
+        Gets the value of minInstancesPerNode or its default value.
+        """
+        return self.getOrDefault(self.minInstancesPerNode)
+
+    def setMinInfoGain(self, value):
+        """
+        Sets the value of :py:attr:`minInfoGain`.
+        """
+        self.paramMap[self.minInfoGain] = value
+        return self
+
+    def getMinInfoGain(self):
+        """
+        Gets the value of minInfoGain or its default value.
+        """
+        return self.getOrDefault(self.minInfoGain)
+
+    def setMaxMemoryInMB(self, value):
+        """
+        Sets the value of :py:attr:`maxMemoryInMB`.
+        """
+        self.paramMap[self.maxMemoryInMB] = value
+        return self
+
+    def getMaxMemoryInMB(self):
+        """
+        Gets the value of maxMemoryInMB or its default value.
+        """
+        return self.getOrDefault(self.maxMemoryInMB)
+
+    def setCacheNodeIds(self, value):
+        """
+        Sets the value of :py:attr:`cacheNodeIds`.
+        """
+        self.paramMap[self.cacheNodeIds] = value
+        return self
+
+    def getCacheNodeIds(self):
+        """
+        Gets the value of cacheNodeIds or its default value.
+        """
+        return self.getOrDefault(self.cacheNodeIds)
+
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
new file mode 100644
index 0000000000000..0ab5c6c3d20c3
--- /dev/null
+++ b/python/pyspark/ml/regression.py
@@ -0,0 +1,520 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.ml.util import keyword_only
+from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.param.shared import *
+from pyspark.mllib.common import inherit_doc
+
+
+__all__ = ['DecisionTreeRegressor', 'DecisionTreeRegressionModel', 'GBTRegressor',
+           'GBTRegressionModel', 'LinearRegression', 'LinearRegressionModel',
+           'RandomForestRegressor', 'RandomForestRegressionModel']
+
+
+@inherit_doc
+class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                       HasRegParam, HasTol):
+    """
+    Linear regression.
+
+    The learning objective is to minimize the squared error, with regularization.
+    The specific squared error loss function used is:
+      L = 1/2n ||A weights - y||^2^
+
+    This support multiple types of regularization:
+     - none (a.k.a. ordinary least squares)
+     - L2 (ridge regression)
+     - L1 (Lasso)
+     - L2 + L1 (elastic net)
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> lr = LinearRegression(maxIter=5, regParam=0.0)
+    >>> model = lr.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    -1.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    >>> lr.setParams("vector")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    """
+    _java_class = "org.apache.spark.ml.regression.LinearRegression"
+    # a placeholder to make it appear in the generated doc
+    elasticNetParam = \
+        Param(Params._dummy(), "elasticNetParam",
+              "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+              "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+        """
+        super(LinearRegression, self).__init__()
+        #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
+        #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
+        self.elasticNetParam = \
+            Param(self, "elasticNetParam",
+                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
+                  "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+        self._setDefault(maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+        Sets params for linear regression.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return LinearRegressionModel(java_model)
+
+    def setElasticNetParam(self, value):
+        """
+        Sets the value of :py:attr:`elasticNetParam`.
+        """
+        self.paramMap[self.elasticNetParam] = value
+        return self
+
+    def getElasticNetParam(self):
+        """
+        Gets the value of elasticNetParam or its default value.
+        """
+        return self.getOrDefault(self.elasticNetParam)
+
+
+class LinearRegressionModel(JavaModel):
+    """
+    Model fitted by LinearRegression.
+    """
+
+
+class TreeRegressorParams(object):
+    """
+    Private class to track supported impurity measures.
+    """
+    supportedImpurities = ["variance"]
+
+
+class RandomForestParams(object):
+    """
+    Private class to track supported random forest parameters.
+    """
+    supportedFeatureSubsetStrategies = ["auto", "all", "onethird", "sqrt", "log2"]
+
+
+class GBTParams(object):
+    """
+    Private class to track supported GBT params.
+    """
+    supportedLossTypes = ["squared", "absolute"]
+
+
+@inherit_doc
+class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                            DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree`
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> dt = DecisionTreeRegressor(maxDepth=2)
+    >>> model = dt.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    """
+
+    _java_class = "org.apache.spark.ml.regression.DecisionTreeRegressor"
+    # a placeholder to make it appear in the generated doc
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance"):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
+        """
+        super(DecisionTreeRegressor, self).__init__()
+        #: param for Criterion used for information gain calculation (case-insensitive).
+        self.impurity = \
+            Param(self, "impurity",
+                  "Criterion used for information gain calculation (case-insensitive). " +
+                  "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="variance")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="variance"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="variance")
+        Sets params for the DecisionTreeRegressor.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return DecisionTreeRegressionModel(java_model)
+
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        self.paramMap[self.impurity] = value
+        return self
+
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+
+class DecisionTreeRegressionModel(JavaModel):
+    """
+    Model fitted by DecisionTreeRegressor.
+    """
+
+
+@inherit_doc
+class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
+                            DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Random_forest  Random Forest`
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2)
+    >>> model = rf.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    0.5
+    """
+
+    _java_class = "org.apache.spark.ml.regression.RandomForestRegressor"
+    # a placeholder to make it appear in the generated doc
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the training data used for learning each decision tree, " +
+                            "in range (0, 1].")
+    numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1)")
+    featureSubsetStrategy = \
+        Param(Params._dummy(), "featureSubsetStrategy",
+              "The number of features to consider for splits at each tree node. Supported " +
+              "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
+                 numTrees=20, featureSubsetStrategy="auto", seed=42):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
+                 numTrees=20, featureSubsetStrategy="auto", seed=42)
+        """
+        super(RandomForestRegressor, self).__init__()
+        #: param for Criterion used for information gain calculation (case-insensitive).
+        self.impurity = \
+            Param(self, "impurity",
+                  "Criterion used for information gain calculation (case-insensitive). " +
+                  "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+        #: param for Fraction of the training data used for learning each decision tree,
+        #  in range (0, 1]
+        self.subsamplingRate = Param(self, "subsamplingRate",
+                                     "Fraction of the training data used for learning each " +
+                                     "decision tree, in range (0, 1].")
+        #: param for Number of trees to train (>= 1)
+        self.numTrees = Param(self, "numTrees", "Number of trees to train (>= 1)")
+        #: param for The number of features to consider for splits at each tree node
+        self.featureSubsetStrategy = \
+            Param(self, "featureSubsetStrategy",
+                  "The number of features to consider for splits at each tree node. Supported " +
+                  "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                         impurity="variance", numTrees=20, featureSubsetStrategy="auto")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  impurity="variance", numTrees=20, featureSubsetStrategy="auto"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  impurity="variance", numTrees=20, featureSubsetStrategy="auto")
+        Sets params for linear regression.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return RandomForestRegressionModel(java_model)
+
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        self.paramMap[self.impurity] = value
+        return self
+
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+        """
+        self.paramMap[self.subsamplingRate] = value
+        return self
+
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    def setNumTrees(self, value):
+        """
+        Sets the value of :py:attr:`numTrees`.
+        """
+        self.paramMap[self.numTrees] = value
+        return self
+
+    def getNumTrees(self):
+        """
+        Gets the value of numTrees or its default value.
+        """
+        return self.getOrDefault(self.numTrees)
+
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+        """
+        self.paramMap[self.featureSubsetStrategy] = value
+        return self
+
+    def getFeatureSubsetStrategy(self):
+        """
+        Gets the value of featureSubsetStrategy or its default value.
+        """
+        return self.getOrDefault(self.featureSubsetStrategy)
+
+
+class RandomForestRegressionModel(JavaModel):
+    """
+    Model fitted by RandomForestRegressor.
+    """
+
+
+@inherit_doc
+class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                   DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)`
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> gbt = GBTRegressor(maxIter=5, maxDepth=2)
+    >>> model = gbt.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    """
+
+    _java_class = "org.apache.spark.ml.regression.GBTRegressor"
+    # a placeholder to make it appear in the generated doc
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the training data used for learning each decision tree, " +
+                            "in range (0, 1].")
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the " +
+                     "contribution of each estimator")
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
+                 maxIter=20, stepSize=0.1):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
+                 maxIter=20, stepSize=0.1)
+        """
+        super(GBTRegressor, self).__init__()
+        #: param for Loss function which GBT tries to minimize (case-insensitive).
+        self.lossType = Param(self, "lossType",
+                              "Loss function which GBT tries to minimize (case-insensitive). " +
+                              "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
+        #: Fraction of the training data used for learning each decision tree, in range (0, 1].
+        self.subsamplingRate = Param(self, "subsamplingRate",
+                                     "Fraction of the training data used for learning each " +
+                                     "decision tree, in range (0, 1].")
+        #: Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of
+        #  each estimator
+        self.stepSize = Param(self, "stepSize",
+                              "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+                              "the contribution of each estimator")
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         lossType="squared", maxIter=20, stepSize=0.1)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  lossType="squared", maxIter=20, stepSize=0.1):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  lossType="squared", maxIter=20, stepSize=0.1)
+        Sets params for Gradient Boosted Tree Regression.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return GBTRegressionModel(java_model)
+
+    def setLossType(self, value):
+        """
+        Sets the value of :py:attr:`lossType`.
+        """
+        self.paramMap[self.lossType] = value
+        return self
+
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+        """
+        self.paramMap[self.subsamplingRate] = value
+        return self
+
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        self.paramMap[self.stepSize] = value
+        return self
+
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
+
+
+class GBTRegressionModel(JavaModel):
+    """
+    Model fitted by GBTRegressor.
+    """
+
+
+if __name__ == "__main__":
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import SQLContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    sc = SparkContext("local[2]", "ml.regression tests")
+    sqlContext = SQLContext(sc)
+    globs['sc'] = sc
+    globs['sqlContext'] = sqlContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    sc.stop()
+    if failure_count:
+        exit(-1)
diff --git a/python/run-tests b/python/run-tests
index f9ca26467f17e..f2757a3967e81 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -98,6 +98,8 @@ function run_ml_tests() {
     echo "Run ml tests ..."
     run_test "pyspark/ml/feature.py"
     run_test "pyspark/ml/classification.py"
+    run_test "pyspark/ml/recommendation.py"
+    run_test "pyspark/ml/regression.py"
     run_test "pyspark/ml/tuning.py"
     run_test "pyspark/ml/tests.py"
     run_test "pyspark/ml/evaluation.py"

From 5438f49ccf374fed16bc2b7fc1556e4c0095b14c Mon Sep 17 00:00:00 2001
From: Tim Ellison <t.p.ellison@gmail.com>
Date: Tue, 12 May 2015 20:48:26 +0100
Subject: [PATCH 42/59] =?UTF-8?q?[SPARK-2018]=20[CORE]=20Upgrade=20LZF=20l?=
 =?UTF-8?q?ibrary=20to=20fix=20endian=20serialization=20p=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…roblem

Pick up newer version of dependency with fix for SPARK-2018.  The update involved patching the ning/compress LZF library to handle big endian systems correctly.

Credit goes to gireeshpunathil for diagnosing the problem, and cowtowncoder for fixing it.

Spark tests run clean for me.

Author: Tim Ellison <t.p.ellison@gmail.com>

Closes #6077 from tellison/UpgradeLZF and squashes the following commits:

ad8d4ef [Tim Ellison] [SPARK-2018] [CORE] Upgrade LZF library to fix endian serialization problem
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 94cf28332cf47..cf9279ea5a2a6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -481,7 +481,7 @@
       <dependency>
         <groupId>com.ning</groupId>
         <artifactId>compress-lzf</artifactId>
-        <version>1.0.0</version>
+        <version>1.0.3</version>
       </dependency>
       <dependency>
         <groupId>org.xerial.snappy</groupId>

From 595a67589a42f8025d3e5fd4da413b1faa2e14bf Mon Sep 17 00:00:00 2001
From: Ram Sriharsha <rsriharsha@hw11853.local>
Date: Tue, 12 May 2015 13:35:12 -0700
Subject: [PATCH 43/59] [SPARK-7015] [MLLIB] [WIP] Multiclass to Binary
 Reduction: One Against All

initial cut of one against all. test code is a scaffolding , not fully implemented.
This WIP is to gather early feedback.

Author: Ram Sriharsha <rsriharsha@hw11853.local>

Closes #5830 from harsha2010/reduction and squashes the following commits:

5f4b495 [Ram Sriharsha] Fix Test
386e98b [Ram Sriharsha] Style fix
49b4a17 [Ram Sriharsha] Simplify the test
02279cc [Ram Sriharsha] Output Label Metadata in Prediction Col
bc78032 [Ram Sriharsha] Code Review Updates
8ce4845 [Ram Sriharsha] Merge with Master
2a807be [Ram Sriharsha] Merge branch 'master' into reduction
e21bfcc [Ram Sriharsha] Style Fix
5614f23 [Ram Sriharsha] Style Fix
c75583a [Ram Sriharsha] Cleanup
7a5f136 [Ram Sriharsha] Fix TODOs
804826b [Ram Sriharsha] Merge with Master
1448a5f [Ram Sriharsha] Style Fix
6e47807 [Ram Sriharsha] Style Fix
d63e46b [Ram Sriharsha] Incorporate Code Review Feedback
ced68b5 [Ram Sriharsha] Refactor OneVsAll to implement Predictor
78fa82a [Ram Sriharsha] extra line
0dfa1fb [Ram Sriharsha] Fix inexhaustive match cases that may arise from UnresolvedAttribute
a59a4f4 [Ram Sriharsha] @Experimental
4167234 [Ram Sriharsha] Merge branch 'master' into reduction
868a4fd [Ram Sriharsha] @Experimental
041d905 [Ram Sriharsha] Code Review Fixes
df188d8 [Ram Sriharsha] Style fix
612ec48 [Ram Sriharsha] Style Fix
6ef43d3 [Ram Sriharsha] Prefer Unresolved Attribute to Option: Java APIs are cleaner
6bf6bff [Ram Sriharsha] Update OneHotEncoder to new API
e29cb89 [Ram Sriharsha] Merge branch 'master' into reduction
1c7fa44 [Ram Sriharsha] Fix Tests
ca83672 [Ram Sriharsha] Incorporate Code Review Feedback + Rename to OneVsRestClassifier
221beeed [Ram Sriharsha] Upgrade to use Copy method for cloning Base Classifiers
26f1ddb [Ram Sriharsha] Merge with SPARK-5956 API changes
9738744 [Ram Sriharsha] Merge branch 'master' into reduction
1a3e375 [Ram Sriharsha] More efficient Implementation: Use withColumn to generate label column dynamically
32e0189 [Ram Sriharsha] Restrict reduction to Margin Based Classifiers
ff272da [Ram Sriharsha] Style fix
28771f5 [Ram Sriharsha] Add Tests for Multiclass to Binary Reduction
b60f874 [Ram Sriharsha] Fix Style issues in Test
3191cdf [Ram Sriharsha] Remove this test, accidental commit
23f056c [Ram Sriharsha] Fix Headers for test
1b5e929 [Ram Sriharsha] Fix Style issues and add Header
8752863 [Ram Sriharsha] [SPARK-7015][MLLib][WIP] Multiclass to Binary Reduction: One Against All
---
 .../scala/org/apache/spark/ml/Predictor.scala |   3 +-
 .../spark/ml/attribute/AttributeGroup.scala   |   1 +
 .../spark/ml/attribute/AttributeType.scala    |   8 +
 .../spark/ml/attribute/attributes.scala       |  37 ++-
 .../spark/ml/feature/VectorIndexer.scala      |   4 +-
 .../apache/spark/ml/reduction/OneVsRest.scala | 211 ++++++++++++++++++
 .../apache/spark/ml/util/MetadataUtils.scala  |   7 +-
 .../ml/reduction/JavaOneVsRestSuite.java      |  85 +++++++
 .../spark/ml/attribute/AttributeSuite.scala   |  10 +-
 .../spark/ml/reduction/OneVsRestSuite.scala   | 113 ++++++++++
 10 files changed, 471 insertions(+), 8 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
 create mode 100644 mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
 create mode 100644 mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index 0e53877de92db..f6a5f27425d1f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -113,7 +113,8 @@ abstract class Predictor[
    *
    * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
    */
-  protected def featuresDataType: DataType = new VectorUDT
+  @DeveloperApi
+  private[ml] def featuresDataType: DataType = new VectorUDT
 
   override def transformSchema(schema: StructType): StructType = {
     validateAndTransformSchema(schema, fitting = true, featuresDataType)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
index d7dee8fed2a55..f5f37aa77929c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
@@ -123,6 +123,7 @@ class AttributeGroup private (
           nominalMetadata += nominal.toMetadataImpl(withType = false)
         case binary: BinaryAttribute =>
           binaryMetadata += binary.toMetadataImpl(withType = false)
+        case UnresolvedAttribute =>
       }
       val attrBldr = new MetadataBuilder
       if (numericMetadata.nonEmpty) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
index 65e7e43d5a5b0..a83febd7de2cc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
@@ -43,6 +43,12 @@ object AttributeType {
     Binary
   }
 
+  /** Unresolved type. */
+  val Unresolved: AttributeType = {
+    case object Unresolved extends AttributeType("unresolved")
+    Unresolved
+  }
+
   /**
    * Gets the [[AttributeType]] object from its name.
    * @param name attribute type name: "numeric", "nominal", or "binary"
@@ -54,6 +60,8 @@ object AttributeType {
       Nominal
     } else if (name == Binary.name) {
       Binary
+    } else if (name == Unresolved.name) {
+      Unresolved
     } else {
       throw new IllegalArgumentException(s"Cannot recognize type $name.")
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 5717d6ec2eaec..e8f7f152784a1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -125,7 +125,13 @@ private[attribute] trait AttributeFactory {
    */
   def fromStructField(field: StructField): Attribute = {
     require(field.dataType == DoubleType)
-    fromMetadata(field.metadata.getMetadata(AttributeKeys.ML_ATTR)).withName(field.name)
+    val metadata = field.metadata
+    val mlAttr = AttributeKeys.ML_ATTR
+    if (metadata.contains(mlAttr)) {
+      fromMetadata(metadata.getMetadata(mlAttr)).withName(field.name)
+    } else {
+      UnresolvedAttribute
+    }
   }
 }
 
@@ -535,3 +541,32 @@ object BinaryAttribute extends AttributeFactory {
     new BinaryAttribute(name, index, values)
   }
 }
+
+/**
+ * An unresolved attribute.
+ */
+object UnresolvedAttribute extends Attribute {
+
+  override def attrType: AttributeType = AttributeType.Unresolved
+
+  override def withIndex(index: Int): Attribute = this
+
+  override def isNumeric: Boolean = false
+
+  override def withoutIndex: Attribute = this
+
+  override def isNominal: Boolean = false
+
+  override def name: Option[String] = None
+
+  override private[attribute] def toMetadataImpl(withType: Boolean): Metadata = {
+    Metadata.empty
+  }
+
+  override def withoutName: Attribute = this
+
+  override def index: Option[Int] = None
+
+  override def withName(name: String): Attribute = this
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 07ea579d69893..2e6313ac14485 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{Estimator, Model}
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, BinaryAttribute, NominalAttribute, NumericAttribute}
+import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.param.{IntParam, ParamValidators, Params}
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.SchemaUtils
@@ -375,6 +375,8 @@ class VectorIndexerModel private[ml] (
           }
         case (origAttr: Attribute, featAttr: NumericAttribute) =>
           origAttr.withIndex(featAttr.index.get)
+        case (origAttr: Attribute, _) =>
+          origAttr
       }
     } else {
       partialFeatureAttributes
diff --git a/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
new file mode 100644
index 0000000000000..0a6728ef1f779
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.reduction
+
+import java.util.UUID
+
+import scala.language.existentials
+
+import org.apache.spark.annotation.{AlphaComponent, Experimental}
+import org.apache.spark.ml._
+import org.apache.spark.ml.attribute._
+import org.apache.spark.ml.classification.{ClassificationModel, Classifier}
+import org.apache.spark.ml.param.Param
+import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * Params for [[OneVsRest]].
+ */
+private[ml] trait OneVsRestParams extends PredictorParams {
+
+  type ClassifierType = Classifier[F, E, M] forSome {
+    type F
+    type M <: ClassificationModel[F, M]
+    type E <:  Classifier[F, E, M]
+  }
+
+  /**
+   * param for the base binary classifier that we reduce multiclass classification into.
+   * @group param
+   */
+  val classifier: Param[ClassifierType]  =
+    new Param(this, "classifier", "base binary classifier ")
+
+  /** @group getParam */
+  def getClassifier: ClassifierType = $(classifier)
+
+}
+
+/**
+ * Model produced by [[OneVsRest]].
+ * Stores the models resulting from training k different classifiers:
+ * one for each class.
+ * Each example is scored against all k models and the model with highest score
+ * is picked to label the example.
+ * TODO: API may need to change when we introduce a ClassificationModel trait as the public API
+ * @param parent
+ * @param labelMetadata Metadata of label column if it exists, or Nominal attribute
+ *                      representing the number of classes in training dataset otherwise.
+ * @param models the binary classification models for reduction.
+ *               The i-th model is produced by testing the i-th class vs the rest.
+ */
+@AlphaComponent
+class OneVsRestModel(
+      override val parent: OneVsRest,
+      labelMetadata: Metadata,
+      val models: Array[_ <: ClassificationModel[_,_]])
+  extends Model[OneVsRestModel] with OneVsRestParams {
+
+  override def transformSchema(schema: StructType): StructType = {
+    validateAndTransformSchema(schema, fitting = false, getClassifier.featuresDataType)
+  }
+
+  override def transform(dataset: DataFrame): DataFrame = {
+    // Check schema
+    transformSchema(dataset.schema, logging = true)
+
+    // determine the input columns: these need to be passed through
+    val origCols = dataset.schema.map(f => col(f.name))
+
+    // add an accumulator column to store predictions of all the models
+    val accColName = "mbc$acc" + UUID.randomUUID().toString
+    val init: () => Map[Int, Double] = () => {Map()}
+    val mapType = MapType(IntegerType, DoubleType, false)
+    val newDataset = dataset.withColumn(accColName, callUDF(init, mapType))
+
+    // persist if underlying dataset is not persistent.
+    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    if (handlePersistence) {
+      newDataset.persist(StorageLevel.MEMORY_AND_DISK)
+    }
+
+    // update the accumulator column with the result of prediction of models
+    val aggregatedDataset = models.zipWithIndex.foldLeft[DataFrame](newDataset) {
+      case (df, (model, index)) => {
+        val rawPredictionCol = model.getRawPredictionCol
+        val columns = origCols ++ List(col(rawPredictionCol), col(accColName))
+
+        // add temporary column to store intermediate scores and update
+        val tmpColName = "mbc$tmp" + UUID.randomUUID().toString
+        val update: (Map[Int, Double], Vector) => Map[Int, Double]  =
+          (predictions: Map[Int, Double], prediction: Vector) => {
+            predictions + ((index, prediction(1)))
+        }
+        val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol))
+        val transformedDataset = model.transform(df).select(columns:_*)
+        val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf)
+        val newColumns = origCols ++ List(col(tmpColName))
+
+        // switch out the intermediate column with the accumulator column
+        updatedDataset.select(newColumns:_*).withColumnRenamed(tmpColName, accColName)
+      }
+    }
+
+    if (handlePersistence) {
+      newDataset.unpersist()
+    }
+
+    // output the index of the classifier with highest confidence as prediction
+    val label: Map[Int, Double] => Double = (predictions: Map[Int, Double]) => {
+      predictions.maxBy(_._2)._1.toDouble
+    }
+
+    // output label and label metadata as prediction
+    val labelUdf = callUDF(label, DoubleType, col(accColName))
+    aggregatedDataset.withColumn($(predictionCol), labelUdf.as($(predictionCol), labelMetadata))
+  }
+}
+
+/**
+ * :: Experimental ::
+ *
+ * Reduction of Multiclass Classification to Binary Classification.
+ * Performs reduction using one against all strategy.
+ * For a multiclass classification with k classes, train k models (one per class).
+ * Each example is scored against all k models and the model with highest score
+ * is picked to label the example.
+ */
+@Experimental
+final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
+
+  /** @group setParam */
+  // TODO: Find a better way to do this. Existential Types don't work with Java API so cast needed.
+  def setClassifier(value: Classifier[_,_,_]): this.type = {
+    set(classifier, value.asInstanceOf[ClassifierType])
+  }
+
+  override def transformSchema(schema: StructType): StructType = {
+    validateAndTransformSchema(schema, fitting = true, getClassifier.featuresDataType)
+  }
+
+  override def fit(dataset: DataFrame): OneVsRestModel = {
+    // determine number of classes either from metadata if provided, or via computation.
+    val labelSchema = dataset.schema($(labelCol))
+    val computeNumClasses: () => Int = () => {
+      val Row(maxLabelIndex: Double) = dataset.agg(max($(labelCol))).head()
+      // classes are assumed to be numbered from 0,...,maxLabelIndex
+      maxLabelIndex.toInt + 1
+    }
+    val numClasses = MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity)
+
+    val multiclassLabeled = dataset.select($(labelCol), $(featuresCol))
+
+    // persist if underlying dataset is not persistent.
+    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    if (handlePersistence) {
+      multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK)
+    }
+
+    // create k columns, one for each binary classifier.
+    val models = Range(0, numClasses).par.map { index =>
+
+      val label: Double => Double = (label: Double) => {
+        if (label.toInt == index) 1.0 else 0.0
+      }
+
+      // generate new label metadata for the binary problem.
+      // TODO: use when ... otherwise after SPARK-7321 is merged
+      val labelUDF = callUDF(label, DoubleType, col($(labelCol)))
+      val newLabelMeta = BinaryAttribute.defaultAttr.withName("label").toMetadata()
+      val labelColName = "mc2b$" + index
+      val labelUDFWithNewMeta = labelUDF.as(labelColName, newLabelMeta)
+      val trainingDataset = multiclassLabeled.withColumn(labelColName, labelUDFWithNewMeta)
+      val classifier = getClassifier
+      classifier.fit(trainingDataset, classifier.labelCol -> labelColName)
+    }.toArray[ClassificationModel[_,_]]
+
+    if (handlePersistence) {
+      multiclassLabeled.unpersist()
+    }
+
+    // extract label metadata from label column if present, or create a nominal attribute
+    // to output the number of labels
+    val labelAttribute = Attribute.fromStructField(labelSchema) match {
+      case _: NumericAttribute | UnresolvedAttribute => {
+        NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses)
+      }
+      case attr: Attribute => attr
+    }
+    copyValues(new OneVsRestModel(this, labelAttribute.toMetadata(), models))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
index c84c8b4eb744f..56075c9a6b39f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
@@ -20,8 +20,7 @@ package org.apache.spark.ml.util
 import scala.collection.immutable.HashMap
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, BinaryAttribute, NominalAttribute,
-  NumericAttribute}
+import org.apache.spark.ml.attribute._
 import org.apache.spark.sql.types.StructField
 
 
@@ -39,9 +38,9 @@ object MetadataUtils {
    */
   def getNumClasses(labelSchema: StructField): Option[Int] = {
     Attribute.fromStructField(labelSchema) match {
-      case numAttr: NumericAttribute => None
       case binAttr: BinaryAttribute => Some(2)
       case nomAttr: NominalAttribute => nomAttr.getNumValues
+      case _: NumericAttribute | UnresolvedAttribute => None
     }
   }
 
@@ -65,7 +64,7 @@ object MetadataUtils {
           Iterator()
         } else {
           attr match {
-            case numAttr: NumericAttribute => Iterator()
+            case _: NumericAttribute | UnresolvedAttribute => Iterator()
             case binAttr: BinaryAttribute => Iterator(idx -> 2)
             case nomAttr: NominalAttribute =>
               nomAttr.getNumValues match {
diff --git a/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
new file mode 100644
index 0000000000000..40a90ae9ded60
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.reduction;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import static scala.collection.JavaConversions.seqAsJavaList;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.classification.LogisticRegression;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateMultinomialLogisticInput;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+
+public class JavaOneVsRestSuite implements Serializable {
+
+    private transient JavaSparkContext jsc;
+    private transient SQLContext jsql;
+    private transient DataFrame dataset;
+    private transient JavaRDD<LabeledPoint> datasetRDD;
+
+    @Before
+    public void setUp() {
+        jsc = new JavaSparkContext("local", "JavaLOneVsRestSuite");
+        jsql = new SQLContext(jsc);
+        int nPoints = 3;
+
+        /**
+         * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
+         * As a result, we are actually drawing samples from probability distribution of built model.
+         */
+        double[] weights = {
+                -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
+                -0.16624, -0.84355, -0.048509, -0.301789, 4.170682 };
+
+        double[] xMean = {5.843, 3.057, 3.758, 1.199};
+        double[] xVariance = {0.6856, 0.1899, 3.116, 0.581};
+        List<LabeledPoint> points = seqAsJavaList(generateMultinomialLogisticInput(
+                weights, xMean, xVariance, true, nPoints, 42));
+        datasetRDD = jsc.parallelize(points, 2);
+        dataset = jsql.createDataFrame(datasetRDD, LabeledPoint.class);
+    }
+
+    @After
+    public void tearDown() {
+        jsc.stop();
+        jsc = null;
+    }
+
+    @Test
+    public void oneVsRestDefaultParams() {
+        OneVsRest ova = new OneVsRest();
+        ova.setClassifier(new LogisticRegression());
+        Assert.assertEquals(ova.getLabelCol() , "label");
+        Assert.assertEquals(ova.getPredictionCol() , "prediction");
+        OneVsRestModel ovaModel = ova.fit(dataset);
+        DataFrame predictions = ovaModel.transform(dataset).select("label", "prediction");
+        predictions.collectAsList();
+        Assert.assertEquals(ovaModel.getLabelCol(), "label");
+        Assert.assertEquals(ovaModel.getPredictionCol() , "prediction");
+    }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
index 3e1a7196e37cb..ec9b717e41ce8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.attribute
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.types.{DoubleType, MetadataBuilder, Metadata}
+import org.apache.spark.sql.types._
 
 class AttributeSuite extends FunSuite {
 
@@ -209,4 +209,12 @@ class AttributeSuite extends FunSuite {
     intercept[IllegalArgumentException](attr.withName(""))
     intercept[IllegalArgumentException](attr.withIndex(-1))
   }
+
+  test("attribute from struct field") {
+    val metadata = NumericAttribute.defaultAttr.withName("label").toMetadata()
+    val fldWithoutMeta = new StructField("x", DoubleType, false, Metadata.empty)
+    assert(Attribute.fromStructField(fldWithoutMeta) == UnresolvedAttribute)
+    val fldWithMeta = new StructField("x", DoubleType, false, metadata)
+    assert(Attribute.fromStructField(fldWithMeta).isNumeric)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
new file mode 100644
index 0000000000000..ebec7c68e8144
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.reduction
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.attribute.NominalAttribute
+import org.apache.spark.ml.classification.{LogisticRegressionModel, LogisticRegression}
+import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.mllib.classification.LogisticRegressionSuite._
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, SQLContext}
+
+class OneVsRestSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient var sqlContext: SQLContext = _
+  @transient var dataset: DataFrame = _
+  @transient var rdd: RDD[LabeledPoint] = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sqlContext = new SQLContext(sc)
+    val nPoints = 1000
+
+    /**
+     * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
+     * As a result, we are actually drawing samples from probability distribution of built model.
+     */
+    val weights = Array(
+      -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
+      -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
+
+    val xMean = Array(5.843, 3.057, 3.758, 1.199)
+    val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
+    rdd = sc.parallelize(generateMultinomialLogisticInput(
+      weights, xMean, xVariance, true, nPoints, 42), 2)
+    dataset = sqlContext.createDataFrame(rdd)
+  }
+
+  test("one-vs-rest: default params") {
+    val numClasses = 3
+    val ova = new OneVsRest()
+    ova.setClassifier(new LogisticRegression)
+    assert(ova.getLabelCol === "label")
+    assert(ova.getPredictionCol === "prediction")
+    val ovaModel = ova.fit(dataset)
+    assert(ovaModel.models.size === numClasses)
+    val transformedDataset = ovaModel.transform(dataset)
+
+    // check for label metadata in prediction col
+    val predictionColSchema = transformedDataset.schema(ovaModel.getPredictionCol)
+    assert(MetadataUtils.getNumClasses(predictionColSchema) === Some(3))
+
+    val ovaResults = transformedDataset
+      .select("prediction", "label")
+      .map(row => (row.getDouble(0), row.getDouble(1)))
+
+    val lr = new LogisticRegressionWithLBFGS().setIntercept(true).setNumClasses(numClasses)
+    lr.optimizer.setRegParam(0.1).setNumIterations(100)
+
+    val model = lr.run(rdd)
+    val results = model.predict(rdd.map(_.features)).zip(rdd.map(_.label))
+    // determine the #confusion matrix in each class.
+    // bound how much error we allow compared to multinomial logistic regression.
+    val expectedMetrics = new MulticlassMetrics(results)
+    val ovaMetrics = new MulticlassMetrics(ovaResults)
+    assert(expectedMetrics.confusionMatrix ~== ovaMetrics.confusionMatrix absTol 400)
+  }
+
+  test("one-vs-rest: pass label metadata correctly during train") {
+    val numClasses = 3
+    val ova = new OneVsRest()
+    ova.setClassifier(new MockLogisticRegression)
+
+    val labelMetadata = NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses)
+    val labelWithMetadata = dataset("label").as("label", labelMetadata.toMetadata())
+    val features = dataset("features").as("features")
+    val datasetWithLabelMetadata = dataset.select(labelWithMetadata, features)
+    ova.fit(datasetWithLabelMetadata)
+  }
+}
+
+private class MockLogisticRegression extends LogisticRegression {
+
+  setMaxIter(1)
+
+  override protected def train(dataset: DataFrame): LogisticRegressionModel = {
+    val labelSchema = dataset.schema($(labelCol))
+    // check for label attribute propagation.
+    assert(MetadataUtils.getNumClasses(labelSchema).forall(_ == 2))
+    super.train(dataset)
+  }
+}

From 2a41c0d71a13558f12c6811bf98791e01186f3ad Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Tue, 12 May 2015 13:36:55 -0700
Subject: [PATCH 44/59] [SPARK-7569][SQL] Better error for invalid binary
 expressions

`scala> Seq((1,1)).toDF("a", "b").select(lit(1) + new java.sql.Date(1)) `

Before:

```
org.apache.spark.sql.AnalysisException: invalid expression (1 + 0) between Literal 1, IntegerType and Literal 0, DateType;
```

After:
```
org.apache.spark.sql.AnalysisException: invalid expression (1 + 0) between int and date;
```

Author: Michael Armbrust <michael@databricks.com>

Closes #6089 from marmbrus/betterBinaryError and squashes the following commits:

23b68ad [Michael Armbrust] [SPARK-7569][SQL] Better error for invalid binary expressions
---
 .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index c8288c6767004..f104e742c90fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -69,7 +69,7 @@ trait CheckAnalysis {
           case b: BinaryExpression if !b.resolved =>
             failAnalysis(
               s"invalid expression ${b.prettyString} " +
-                s"between ${b.left.simpleString} and ${b.right.simpleString}")
+              s"between ${b.left.dataType.simpleString} and ${b.right.dataType.simpleString}")
 
           case w @ WindowExpression(windowFunction, windowSpec) if windowSpec.validate.nonEmpty =>
             // The window spec is not valid.

From 23b9863e2aa7ecd0c4fa3aa8a59fdae09b4fe1d7 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 14:24:26 -0700
Subject: [PATCH 45/59] [SPARK-7559] [MLLIB] Bucketizer should include the
 right most boundary in the last bucket.

We make special treatment for +inf in `Bucketizer`. This could be simplified by always including the largest split value in the last bucket. E.g., (x1, x2, x3) defines buckets [x1, x2) and [x2, x3]. This shouldn't affect user code much, and there are applications that need to include the right-most value. For example, we can bucketize ratings from 0 to 10 to bad, neutral, and good with splits 0, 4, 6, 10. It may reads weird if the users need to put 0, 4, 6, 10.1 (or 11).

This also update the impl to use `Arrays.binarySearch` and `withClue` in test.

yinxusen jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #6075 from mengxr/SPARK-7559 and squashes the following commits:

e28f910 [Xiangrui Meng] update bucketizer impl
---
 .../apache/spark/ml/feature/Bucketizer.scala  | 55 ++++++++++---------
 .../spark/ml/feature/BucketizerSuite.scala    | 25 +++++----
 2 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 7dba64bc3506f..b28c88aaaecbc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.ml.feature
 
+import java.{util => ju}
+
+import org.apache.spark.SparkException
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
@@ -38,18 +41,19 @@ final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
   def this() = this(null)
 
   /**
-   * Parameter for mapping continuous features into buckets. With n splits, there are n+1 buckets.
-   * A bucket defined by splits x,y holds values in the range [x,y). Splits should be strictly
-   * increasing. Values at -inf, inf must be explicitly provided to cover all Double values;
+   * Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets.
+   * A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which
+   * also includes y. Splits should be strictly increasing.
+   * Values at -inf, inf must be explicitly provided to cover all Double values;
    * otherwise, values outside the splits specified will be treated as errors.
    * @group param
    */
   val splits: Param[Array[Double]] = new Param[Array[Double]](this, "splits",
-    "Split points for mapping continuous features into buckets. With n splits, there are n+1 " +
-      "buckets. A bucket defined by splits x,y holds values in the range [x,y). The splits " +
-      "should be strictly increasing. Values at -inf, inf must be explicitly provided to cover" +
-      " all Double values; otherwise, values outside the splits specified will be treated as" +
-      " errors.",
+    "Split points for mapping continuous features into buckets. With n+1 splits, there are n " +
+      "buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " +
+      "bucket, which also includes y. The splits should be strictly increasing. " +
+      "Values at -inf, inf must be explicitly provided to cover all Double values; " +
+      "otherwise, values outside the splits specified will be treated as errors.",
     Bucketizer.checkSplits)
 
   /** @group getParam */
@@ -104,28 +108,25 @@ private[feature] object Bucketizer {
 
   /**
    * Binary searching in several buckets to place each data point.
-   * @throws RuntimeException if a feature is < splits.head or >= splits.last
+   * @throws SparkException if a feature is < splits.head or > splits.last
    */
-  def binarySearchForBuckets(
-      splits: Array[Double],
-      feature: Double): Double = {
-    // Check bounds.  We make an exception for +inf so that it can exist in some bin.
-    if ((feature < splits.head) || (feature >= splits.last && feature != Double.PositiveInfinity)) {
-      throw new RuntimeException(s"Feature value $feature out of Bucketizer bounds" +
-        s" [${splits.head}, ${splits.last}).  Check your features, or loosen " +
-        s"the lower/upper bound constraints.")
-    }
-    var left = 0
-    var right = splits.length - 2
-    while (left < right) {
-      val mid = (left + right) / 2
-      val split = splits(mid + 1)
-      if (feature < split) {
-        right = mid
+  def binarySearchForBuckets(splits: Array[Double], feature: Double): Double = {
+    if (feature == splits.last) {
+      splits.length - 2
+    } else {
+      val idx = ju.Arrays.binarySearch(splits, feature)
+      if (idx >= 0) {
+        idx
       } else {
-        left = mid + 1
+        val insertPos = -idx - 1
+        if (insertPos == 0 || insertPos == splits.length) {
+          throw new SparkException(s"Feature value $feature out of Bucketizer bounds" +
+            s" [${splits.head}, ${splits.last}].  Check your features, or loosen " +
+            s"the lower/upper bound constraints.")
+        } else {
+          insertPos - 1
+        }
       }
     }
-    left
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index acb46c0a35709..1900820400aee 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -57,16 +57,18 @@ class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
 
     // Check for exceptions when using a set of invalid feature values.
     val invalidData1: Array[Double] = Array(-0.9) ++ validData
-    val invalidData2 = Array(0.5) ++ validData
+    val invalidData2 = Array(0.51) ++ validData
     val badDF1 = sqlContext.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx")
-    intercept[RuntimeException]{
-      bucketizer.transform(badDF1).collect()
-      println("Invalid feature value -0.9 was not caught as an invalid feature!")
+    withClue("Invalid feature value -0.9 was not caught as an invalid feature!") {
+      intercept[SparkException] {
+        bucketizer.transform(badDF1).collect()
+      }
     }
     val badDF2 = sqlContext.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx")
-    intercept[RuntimeException]{
-      bucketizer.transform(badDF2).collect()
-      println("Invalid feature value 0.5 was not caught as an invalid feature!")
+    withClue("Invalid feature value 0.51 was not caught as an invalid feature!") {
+      intercept[SparkException] {
+        bucketizer.transform(badDF2).collect()
+      }
     }
   }
 
@@ -137,12 +139,11 @@ private object BucketizerSuite extends FunSuite {
     }
     var i = 0
     while (i < splits.length - 1) {
-      testFeature(splits(i), i) // Split i should fall in bucket i.
-      testFeature((splits(i) + splits(i + 1)) / 2, i) // Value between splits i,i+1 should be in i.
+      // Split i should fall in bucket i.
+      testFeature(splits(i), i)
+      // Value between splits i,i+1 should be in i, which is also true if the (i+1)-th split is inf.
+      testFeature((splits(i) + splits(i + 1)) / 2, i)
       i += 1
     }
-    if (splits.last === Double.PositiveInfinity) {
-      testFeature(Double.PositiveInfinity, splits.length - 2)
-    }
   }
 }

From 455551d1c6cc206ffe1ff5ac52ca0ed89c61653d Mon Sep 17 00:00:00 2001
From: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com>
Date: Tue, 12 May 2015 14:37:23 -0700
Subject: [PATCH 46/59] [SPARK-7484][SQL]Support jdbc connection properties

Few jdbc drivers like SybaseIQ support passing username and password only through connection properties. So the same needs to be supported for
SQLContext.jdbc, dataframe.createJDBCTable and dataframe.insertIntoJDBC.
Added as default arguments or overrided function to support backward compatability.

Author: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com>

Closes #6009 from gvramana/add_jdbc_conn_properties and squashes the following commits:

396a0d0 [Venkata Ramana Gollamudi] fixed comments
d66dd8c [Venkata Ramana Gollamudi] fixed comments
1b8cd8c [Venkata Ramana Gollamudi] Support jdbc connection properties
---
 .../org/apache/spark/sql/DataFrame.scala      | 48 +++++++++--
 .../org/apache/spark/sql/SQLContext.scala     | 79 ++++++++++++++++---
 .../org/apache/spark/sql/jdbc/jdbc.scala      | 19 +++--
 .../spark/sql/jdbc/JDBCWriteSuite.scala       | 33 +++++---
 4 files changed, 148 insertions(+), 31 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 1f85dac682cbe..01fd432cc8190 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.io.CharArrayWriter
 import java.sql.DriverManager
+import java.util.Properties
 
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
@@ -1582,7 +1583,24 @@ class DataFrame private[sql](
    * @group output
    */
   def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
-    val conn = DriverManager.getConnection(url)
+    createJDBCTable(url, table, allowExisting, new Properties())
+  }
+    
+  /**
+   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`
+   * using connection properties defined in `properties`.
+   * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements.
+   * If you pass `true` for `allowExisting`, it will drop any table with the
+   * given name; if you pass `false`, it will throw if the table already
+   * exists.
+   * @group output
+   */
+  def createJDBCTable(
+      url: String,
+      table: String,
+      allowExisting: Boolean,
+      properties: Properties): Unit = {
+    val conn = DriverManager.getConnection(url, properties)
     try {
       if (allowExisting) {
         val sql = s"DROP TABLE IF EXISTS $table"
@@ -1594,7 +1612,7 @@ class DataFrame private[sql](
     } finally {
       conn.close()
     }
-    JDBCWriteDetails.saveTable(this, url, table)
+    JDBCWriteDetails.saveTable(this, url, table, properties)
   }
 
   /**
@@ -1610,8 +1628,29 @@ class DataFrame private[sql](
    * @group output
    */
   def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
+    insertIntoJDBC(url, table, overwrite, new Properties())
+  }
+
+  /**
+   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`
+   * using connection properties defined in `properties`.
+   * Assumes the table already exists and has a compatible schema.  If you
+   * pass `true` for `overwrite`, it will `TRUNCATE` the table before
+   * performing the `INSERT`s.
+   *
+   * The table must already exist on the database.  It must have a schema
+   * that is compatible with the schema of this RDD; inserting the rows of
+   * the RDD in order via the simple statement
+   * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
+   * @group output
+   */
+  def insertIntoJDBC(
+      url: String,
+      table: String,
+      overwrite: Boolean,
+      properties: Properties): Unit = {
     if (overwrite) {
-      val conn = DriverManager.getConnection(url)
+      val conn = DriverManager.getConnection(url, properties)
       try {
         val sql = s"TRUNCATE TABLE $table"
         conn.prepareStatement(sql).executeUpdate()
@@ -1619,9 +1658,8 @@ class DataFrame private[sql](
         conn.close()
       }
     }
-    JDBCWriteDetails.saveTable(this, url, table)
+    JDBCWriteDetails.saveTable(this, url, table, properties)
   }
-
   ////////////////////////////////////////////////////////////////////////////
   // for Python API
   ////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index afee09adaae20..70ba8985d6342 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -949,9 +949,21 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def jdbc(url: String, table: String): DataFrame = {
-    jdbc(url, table, JDBCRelation.columnPartition(null))
+    jdbc(url, table, JDBCRelation.columnPartition(null), new Properties())
   }
-
+  
+  /**
+   * :: Experimental ::
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table and connection properties.
+   *
+   * @group specificdata
+   */
+  @Experimental
+  def jdbc(url: String, table: String, properties: Properties): DataFrame = {
+    jdbc(url, table, JDBCRelation.columnPartition(null), properties)
+  }
+  
   /**
    * :: Experimental ::
    * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
@@ -963,7 +975,31 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @param upperBound the maximum value of `columnName` used to decide partition stride
    * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
    *                      evenly into this many partitions
+   * @group specificdata
+   */
+  @Experimental
+  def jdbc(
+      url: String,
+      table: String,  
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int): DataFrame = {
+    jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties())
+  }
+
+  /**
+   * :: Experimental ::
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table.  Partitions of the table will be retrieved in parallel based on the parameters
+   * passed to this function.
    *
+   * @param columnName the name of a column of integral type that will be used for partitioning.
+   * @param lowerBound the minimum value of `columnName` used to decide partition stride
+   * @param upperBound the maximum value of `columnName` used to decide partition stride
+   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
+   *                      evenly into this many partitions
+   * @param properties connection properties
    * @group specificdata
    */
   @Experimental
@@ -973,16 +1009,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
       columnName: String,
       lowerBound: Long,
       upperBound: Long,
-      numPartitions: Int): DataFrame = {
+      numPartitions: Int,
+      properties: Properties): DataFrame = {
     val partitioning = JDBCPartitioningInfo(columnName, lowerBound, upperBound, numPartitions)
     val parts = JDBCRelation.columnPartition(partitioning)
-    jdbc(url, table, parts)
+    jdbc(url, table, parts, properties)
   }
-
+  
   /**
    * :: Experimental ::
    * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table.  The theParts parameter gives a list expressions
+   * url named table. The theParts parameter gives a list expressions
    * suitable for inclusion in WHERE clauses; each one defines one partition
    * of the [[DataFrame]].
    *
@@ -990,14 +1027,36 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
+    jdbc(url, table, theParts, new Properties())
+  }
+
+  /**
+   * :: Experimental ::
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table using connection properties. The theParts parameter gives a list expressions
+   * suitable for inclusion in WHERE clauses; each one defines one partition
+   * of the [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @Experimental
+  def jdbc(
+      url: String,
+      table: String,
+      theParts: Array[String],
+      properties: Properties): DataFrame = {
     val parts: Array[Partition] = theParts.zipWithIndex.map { case (part, i) =>
       JDBCPartition(part, i) : Partition
     }
-    jdbc(url, table, parts)
+    jdbc(url, table, parts, properties)
   }
-
-  private def jdbc(url: String, table: String, parts: Array[Partition]): DataFrame = {
-    val relation = JDBCRelation(url, table, parts)(this)
+  
+  private def jdbc(
+      url: String,
+      table: String,
+      parts: Array[Partition],
+      properties: Properties): DataFrame = {
+    val relation = JDBCRelation(url, table, parts, properties)(this)
     baseRelationToDataFrame(relation)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
index 3a6c2c1e9101f..c099881a01226 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
@@ -57,9 +57,14 @@ package object jdbc {
      * non-Serializable.  Instead, we explicitly close over all variables that
      * are used.
      */
-    def savePartition(url: String, table: String, iterator: Iterator[Row],
-        rddSchema: StructType, nullTypes: Array[Int]): Iterator[Byte] = {
-      val conn = DriverManager.getConnection(url)
+    def savePartition(
+        url: String,
+        table: String,
+        iterator: Iterator[Row],
+        rddSchema: StructType,
+        nullTypes: Array[Int],
+        properties: Properties): Iterator[Byte] = {
+      val conn = DriverManager.getConnection(url, properties)
       var committed = false
       try {
         conn.setAutoCommit(false) // Everything in the same db transaction.
@@ -152,7 +157,11 @@ package object jdbc {
     /**
      * Saves the RDD to the database in a single transaction.
      */
-    def saveTable(df: DataFrame, url: String, table: String) {
+    def saveTable(
+        df: DataFrame,
+        url: String,
+        table: String,
+        properties: Properties = new Properties()) {
       val quirks = DriverQuirks.get(url)
       var nullTypes: Array[Int] = df.schema.fields.map(field => {
         var nullType: Option[Int] = quirks.getJDBCType(field.dataType)._2
@@ -178,7 +187,7 @@ package object jdbc {
 
       val rddSchema = df.schema
       df.foreachPartition { iterator =>
-        JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes)
+        JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes, properties)
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index ee5c7620d1a22..f3ce8e66460e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.DriverManager
+import java.util.Properties
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
@@ -28,15 +29,25 @@ import org.apache.spark.sql.types._
 class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
   val url = "jdbc:h2:mem:testdb2"
   var conn: java.sql.Connection = null
-
+  val url1 = "jdbc:h2:mem:testdb3"
+  var conn1: java.sql.Connection = null
+  val properties = new Properties()
+  properties.setProperty("user", "testUser")
+  properties.setProperty("password", "testPass")
+  properties.setProperty("rowId", "false")
+    
   before {
     Class.forName("org.h2.Driver")
     conn = DriverManager.getConnection(url)
     conn.prepareStatement("create schema test").executeUpdate()
+   
+    conn1 = DriverManager.getConnection(url1, properties)
+    conn1.prepareStatement("create schema test").executeUpdate()
   }
 
   after {
     conn.close()
+    conn1.close()
   }
 
   val sc = TestSQLContext.sparkContext
@@ -65,13 +76,13 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x3), schema3)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2)
 
-    df.createJDBCTable(url, "TEST.DROPTEST", false)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.DROPTEST").count)
-    assert(3 == TestSQLContext.jdbc(url, "TEST.DROPTEST").collect()(0).length)
+    df.createJDBCTable(url1, "TEST.DROPTEST", false, properties)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).count)
+    assert(3 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
 
-    df2.createJDBCTable(url, "TEST.DROPTEST", true)
-    assert(1 == TestSQLContext.jdbc(url, "TEST.DROPTEST").count)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.DROPTEST").collect()(0).length)
+    df2.createJDBCTable(url1, "TEST.DROPTEST", true, properties)
+    assert(1 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).count)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
   }
 
   test("CREATE then INSERT to append") {
@@ -88,10 +99,10 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2)
 
-    df.createJDBCTable(url, "TEST.TRUNCATETEST", false)
-    df2.insertIntoJDBC(url, "TEST.TRUNCATETEST", true)
-    assert(1 == TestSQLContext.jdbc(url, "TEST.TRUNCATETEST").count)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.TRUNCATETEST").collect()(0).length)
+    df.createJDBCTable(url1, "TEST.TRUNCATETEST", false, properties)
+    df2.insertIntoJDBC(url1, "TEST.TRUNCATETEST", true, properties)
+    assert(1 == TestSQLContext.jdbc(url1, "TEST.TRUNCATETEST", properties).count)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length)
   }
 
   test("Incompatible INSERT to append") {

From a4874b0d1820efd24071108434a4d89429473fe3 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 14:39:03 -0700
Subject: [PATCH 47/59] [SPARK-7571] [MLLIB] rename Math to math

`scala.Math` is deprecated since 2.8. This PR only touchs `Math` usages in MLlib. dbtsai

Author: Xiangrui Meng <meng@databricks.com>

Closes #6092 from mengxr/SPARK-7571 and squashes the following commits:

fe8f8d3 [Xiangrui Meng] Math -> math
---
 .../spark/ml/classification/LogisticRegression.scala      | 4 ++--
 .../apache/spark/mllib/clustering/GaussianMixture.scala   | 2 +-
 .../scala/org/apache/spark/mllib/optimization/NNLS.scala  | 2 +-
 .../scala/org/apache/spark/mllib/stat/KernelDensity.scala | 4 ++--
 .../spark/ml/classification/LogisticRegressionSuite.scala | 2 +-
 .../org/apache/spark/mllib/feature/NormalizerSuite.scala  | 8 ++++----
 .../spark/mllib/linalg/distributed/RowMatrixSuite.scala   | 4 ++--
 .../org/apache/spark/mllib/optimization/LBFGSSuite.scala  | 2 +-
 .../spark/mllib/regression/IsotonicRegressionSuite.scala  | 2 +-
 9 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 647226a0d1a6e..93ba91167bfad 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -175,7 +175,7 @@ class LogisticRegression
        * }}}
        */
       initialWeightsWithIntercept.toArray(numFeatures)
-        = Math.log(histogram(1).toDouble / histogram(0).toDouble)
+        = math.log(histogram(1).toDouble / histogram(0).toDouble)
     }
 
     val states = optimizer.iterations(new CachedDiffFunction(costFun),
@@ -285,7 +285,7 @@ class LogisticRegressionModel private[ml] (
     } else if (t == 1.0) {
       Double.PositiveInfinity
     } else {
-      Math.log(t / (1.0 - t))
+      math.log(t / (1.0 - t))
     }
     if (rawPrediction(1) > rawThreshold) 1 else 0
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 568b65305649f..c88410ac0ff43 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -160,7 +160,7 @@ class GaussianMixture private (
     var llhp = 0.0            // previous log-likelihood
     
     var iter = 0
-    while(iter < maxIterations && Math.abs(llh-llhp) > convergenceTol) {
+    while (iter < maxIterations && math.abs(llh-llhp) > convergenceTol) {
       // create and broadcast curried cluster contribution function
       val compute = sc.broadcast(ExpectationSum.add(weights, gaussians)_)
       
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
index 4766f7708295d..64d52bae00907 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
@@ -91,7 +91,7 @@ private[spark] object NNLS {
     val dir = ws.dir
     val lastDir = ws.lastDir
     val res = ws.res
-    val iterMax = Math.max(400, 20 * n)
+    val iterMax = math.max(400, 20 * n)
     var lastNorm = 0.0
     var iterno = 0
     var lastWall = 0 // Last iteration when we hit a bound constraint.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
index 0deef11b4511a..79747cc5d7d74 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
@@ -32,7 +32,7 @@ private[stat] object KernelDensity {
 
     // This gets used in each Gaussian PDF computation, so compute it up front
     val logStandardDeviationPlusHalfLog2Pi =
-      Math.log(standardDeviation) + 0.5 * Math.log(2 * Math.PI)
+      math.log(standardDeviation) + 0.5 * math.log(2 * math.Pi)
 
     val (points, count) = samples.aggregate((new Array[Double](evaluationPoints.length), 0))(
       (x, y) => {
@@ -66,6 +66,6 @@ private[stat] object KernelDensity {
     val x0 = x - mean
     val x1 = x0 / standardDeviation
     val logDensity = -0.5 * x1 * x1 - logStandardDeviationPlusHalfLog2Pi
-    Math.exp(logDensity)
+    math.exp(logDensity)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 78cdd471859b7..4df8016009171 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -489,7 +489,7 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
      * b = \log{P(1) / P(0)} = \log{count_1 / count_0}
      * }}}
      */
-    val interceptTheory = Math.log(histogram(1).toDouble / histogram(0).toDouble)
+    val interceptTheory = math.log(histogram(1).toDouble / histogram(0).toDouble)
     val weightsTheory = Array(0.0, 0.0, 0.0, 0.0)
 
     assert(model.intercept ~== interceptTheory relTol 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
index 85fdd271b5ed1..5c4af2b99e68b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
@@ -106,10 +106,10 @@ class NormalizerSuite extends FunSuite with MLlibTestSparkContext {
 
     assert((dataInf, dataInfRDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
 
-    assert(dataInf(0).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
-    assert(dataInf(2).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
-    assert(dataInf(3).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
-    assert(dataInf(4).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(0).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(2).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(3).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(4).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
 
     assert(dataInf(0) ~== Vectors.sparse(3, Seq((0, -0.86956522), (1, 1.0))) absTol 1E-5)
     assert(dataInf(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index 3309713e91f87..27bb19f472e1e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -96,7 +96,7 @@ class RowMatrixSuite extends FunSuite with MLlibTestSparkContext {
   }
 
   test("similar columns") {
-    val colMags = Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94))
+    val colMags = Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94))
     val expected = BDM(
       (0.0, 54.0, 72.0),
       (0.0, 0.0, 78.0),
@@ -232,7 +232,7 @@ class RowMatrixSuite extends FunSuite with MLlibTestSparkContext {
         assert(summary.numNonzeros === Vectors.dense(3.0, 3.0, 4.0), "nnz mismatch")
         assert(summary.max === Vectors.dense(9.0, 7.0, 8.0), "max mismatch")
         assert(summary.min === Vectors.dense(0.0, 0.0, 1.0), "column mismatch.")
-        assert(summary.normL2 === Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94)),
+        assert(summary.normL2 === Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94)),
           "magnitude mismatch.")
         assert(summary.normL1 === Vectors.dense(18.0, 12.0, 16.0), "L1 norm mismatch")
       }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index 70c64775e4c04..c8f2adcf155a7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -89,7 +89,7 @@ class LBFGSSuite extends FunSuite with MLlibTestSparkContext with Matchers {
     // it requires 90 iterations in GD. No matter how hard we increase
     // the number of iterations in GD here, the lossGD will be always
     // larger than lossLBFGS. This is based on observation, no theoretically guaranteed
-    assert(Math.abs((lossGD.last - loss.last) / loss.last) < 0.02,
+    assert(math.abs((lossGD.last - loss.last) / loss.last) < 0.02,
       "LBFGS should match GD result within 2% difference.")
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
index 8e12340bbd9d6..3b38bdf5ef5eb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.Utils
 class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers {
 
   private def round(d: Double) = {
-    Math.round(d * 100).toDouble / 100
+    math.round(d * 100).toDouble / 100
   }
 
   private def generateIsotonicInput(labels: Seq[Double]): Seq[(Double, Double, Double)] = {

From 1422e79e517ca14a6b0e178f015362d2e0d413c6 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Tue, 12 May 2015 14:41:21 -0700
Subject: [PATCH 48/59] [SPARK-7406] [STREAMING] [WEBUI] Add tooltips for
 "Scheduling Delay", "Processing Time" and "Total Delay"

Screenshots:
![screen shot 2015-05-06 at 2 29 03 pm](https://cloud.githubusercontent.com/assets/1000778/7504129/9c57f710-f3fc-11e4-9c6e-1b79c17c546d.png)

![screen shot 2015-05-06 at 2 24 35 pm](https://cloud.githubusercontent.com/assets/1000778/7504140/b63bb216-f3fc-11e4-83a5-6dfc6481d192.png)

tdas as we discussed offline

Author: zsxwing <zsxwing@gmail.com>

Closes #5952 from zsxwing/SPARK-7406 and squashes the following commits:

2b004ea [zsxwing] Merge branch 'master' into SPARK-7406
e9eb506 [zsxwing] Update tooltip contents
2215b2a [zsxwing] Add tooltips for "Scheduling Delay", "Processing Time" and "Total Delay"
---
 .../src/main/scala/org/apache/spark/ui/UIUtils.scala | 12 +++++++-----
 .../apache/spark/streaming/ui/AllBatchesTable.scala  | 11 ++++++++---
 .../apache/spark/streaming/ui/StreamingPage.scala    |  6 +++---
 .../org/apache/spark/streaming/UISeleniumSuite.scala |  6 +++---
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 441c97d6d3e2a..ad16becde85dd 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -194,11 +194,7 @@ private[spark] object UIUtils extends Logging {
         <a href={prependBaseUri(activeTab.basePath, "/" + tab.prefix + "/")}>{tab.name}</a>
       </li>
     }
-    val helpButton: Seq[Node] = helpText.map { helpText =>
-      <sup>
-        (<a data-toggle="tooltip" data-placement="bottom" title={helpText}>?</a>)
-      </sup>
-    }.getOrElse(Seq.empty)
+    val helpButton: Seq[Node] = helpText.map(tooltip(_, "bottom")).getOrElse(Seq.empty)
 
     <html>
       <head>
@@ -375,6 +371,12 @@ private[spark] object UIUtils extends Logging {
     </div>
   }
 
+  def tooltip(text: String, position: String): Seq[Node] = {
+    <sup>
+      (<a data-toggle="tooltip" data-placement={position} title={text}>?</a>)
+    </sup>
+  }
+
   /** Return a script element that automatically expands the DAG visualization on page load. */
   def expandDagVizOnLoad(forJob: Boolean): Seq[Node] = {
     <script type="text/javascript">
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
index 2960b528d4c5e..3619e129ad9cf 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
@@ -26,8 +26,11 @@ private[ui] abstract class BatchTableBase(tableId: String) {
   protected def columns: Seq[Node] = {
     <th>Batch Time</th>
       <th>Input Size</th>
-      <th>Scheduling Delay</th>
-      <th>Processing Time</th>
+      <th>Scheduling Delay
+        {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "top")}
+      </th>
+      <th>Processing Time
+        {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "top")}</th>
   }
 
   protected def baseRow(batch: BatchUIData): Seq[Node] = {
@@ -99,7 +102,9 @@ private[ui] class ActiveBatchTable(
 private[ui] class CompletedBatchTable(batches: Seq[BatchUIData])
   extends BatchTableBase("completed-batches-table") {
 
-  override protected def columns: Seq[Node] = super.columns ++ <th>Total Delay</th>
+  override protected def columns: Seq[Node] = super.columns ++
+    <th>Total Delay
+      {SparkUIUtils.tooltip("Total time taken to handle a batch", "top")}</th>
 
   override protected def renderRows: Seq[Node] = {
     batches.flatMap(batch => <tr>{completedBatchRow(batch)}</tr>)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index ecbebe5c6c97b..ff0f2b18dc321 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -347,7 +347,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Scheduling Delay</strong></div>
+              <div><strong>Scheduling Delay {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "right")}</strong></div>
               <div>Avg: {schedulingDelay.formattedAvg}</div>
             </div>
           </td>
@@ -357,7 +357,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Processing Time</strong></div>
+              <div><strong>Processing Time {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "right")}</strong></div>
               <div>Avg: {processingTime.formattedAvg}</div>
             </div>
           </td>
@@ -367,7 +367,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Total Delay</strong></div>
+              <div><strong>Total Delay {SparkUIUtils.tooltip("Total time taken to handle a batch", "right")}</strong></div>
               <div>Avg: {totalDelay.formattedAvg}</div>
             </div>
           </td>
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
index 2211f62383ce8..441bbf95d0153 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -116,12 +116,12 @@ class UISeleniumSuite
         h4Text.exists(_.matches("Completed Batches \\(last \\d+ out of \\d+\\)")) should be (true)
 
         findAll(cssSelector("""#active-batches-table th""")).map(_.text).toSeq should be {
-          List("Batch Time", "Input Size", "Scheduling Delay", "Processing Time",
+          List("Batch Time", "Input Size", "Scheduling Delay (?)", "Processing Time (?)",
             "Status")
         }
         findAll(cssSelector("""#completed-batches-table th""")).map(_.text).toSeq should be {
-          List("Batch Time", "Input Size", "Scheduling Delay", "Processing Time",
-            "Total Delay")
+          List("Batch Time", "Input Size", "Scheduling Delay (?)", "Processing Time (?)",
+            "Total Delay (?)")
         }
 
         val batchLinks =

From 1d703660d4d14caea697affdf31170aea44c8903 Mon Sep 17 00:00:00 2001
From: Yuhao Yang <hhbyyh@gmail.com>
Date: Tue, 12 May 2015 15:12:29 -0700
Subject: [PATCH 49/59] [SPARK-7496] [MLLIB] Update Programming guide with
 Online LDA

jira: https://issues.apache.org/jira/browse/SPARK-7496

Update LDA subsection of clustering section of MLlib programming guide to include OnlineLDA.

Author: Yuhao Yang <hhbyyh@gmail.com>

Closes #6046 from hhbyyh/ldaDocument and squashes the following commits:

4b6fbfa [Yuhao Yang] add online paper and some comparison
fd4c983 [Yuhao Yang] update lda document for optimizers
---
 docs/mllib-clustering.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index f5aa15b7d9b79..f41ca70952eb7 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -377,11 +377,11 @@ LDA can be thought of as a clustering algorithm as follows:
  on a statistical model of how text documents are generated.
 
 LDA takes in a collection of documents as vectors of word counts.
-It learns clustering using [expectation-maximization](http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm)
-on the likelihood function. After fitting on the documents, LDA provides:
+It supports different inference algorithms via `setOptimizer` function. EMLDAOptimizer learns clustering using [expectation-maximization](http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm)
+on the likelihood function and yields comprehensive results, while OnlineLDAOptimizer uses iterative mini-batch sampling for [online variational inference](https://www.cs.princeton.edu/~blei/papers/HoffmanBleiBach2010b.pdf) and is generally memory friendly. After fitting on the documents, LDA provides:
 
 * Topics: Inferred topics, each of which is a probability distribution over terms (words).
-* Topic distributions for documents: For each document in the training set, LDA gives a probability distribution over topics.
+* Topic distributions for documents: For each document in the training set, LDA gives a probability distribution over topics. (EM only)
 
 LDA takes the following parameters:
 

From f0c1bc3472a7422ae5649634f29c88e161f5ecaf Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Tue, 12 May 2015 16:39:56 -0700
Subject: [PATCH 50/59] [SPARK-7557] [ML] [DOC] User guide for spark.ml
 HashingTF, Tokenizer

Added feature transformer subsection to spark.ml guide, with HashingTF and Tokenizer.  Added JavaHashingTFSuite to test Java examples in new guide.

I've run Scala, Python examples in the Spark/PySpark shells.  I ran the Java examples via the test suite (with small modifications for printing).

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6093 from jkbradley/hashingtf-guide and squashes the following commits:

d5d213f [Joseph K. Bradley] small fix
dd6e91a [Joseph K. Bradley] fixes from code review of user guide
33c3ff9 [Joseph K. Bradley] small fix
bc6058c [Joseph K. Bradley] fix link
361a174 [Joseph K. Bradley] Added subsection for feature transformers to spark.ml guide, with HashingTF and Tokenizer.  Added JavaHashingTFSuite to test Java examples in new guide
---
 docs/ml-features.md                           | 188 ++++++++++++++++++
 docs/ml-guide.md                              |   9 +
 .../spark/ml/feature/JavaHashingTFSuite.java  |  81 ++++++++
 3 files changed, 278 insertions(+)
 create mode 100644 docs/ml-features.md
 create mode 100644 mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java

diff --git a/docs/ml-features.md b/docs/ml-features.md
new file mode 100644
index 0000000000000..0cbebcb739b14
--- /dev/null
+++ b/docs/ml-features.md
@@ -0,0 +1,188 @@
+---
+layout: global
+title: Feature Extraction, Transformation, and Selection - SparkML
+displayTitle: <a href="ml-guide.html">ML</a> - Features
+---
+
+This section covers algorithms for working with features, roughly divided into these groups:
+
+* Extraction: Extracting features from "raw" data
+* Transformation: Scaling, converting, or modifying features
+* Selection: Selecting a subset from a larger set of features
+
+**Table of Contents**
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+
+# Feature Extractors
+
+## Hashing Term-Frequency (HashingTF)
+
+`HashingTF` is a `Transformer` which takes sets of terms (e.g., `String` terms can be sets of words) and converts those sets into fixed-length feature vectors.
+The algorithm combines [Term Frequency (TF)](http://en.wikipedia.org/wiki/Tf%E2%80%93idf) counts with the [hashing trick](http://en.wikipedia.org/wiki/Feature_hashing) for dimensionality reduction.  Please refer to the [MLlib user guide on TF-IDF](mllib-feature-extraction.html#tf-idf) for more details on Term-Frequency.
+
+HashingTF is implemented in
+[HashingTF](api/scala/index.html#org.apache.spark.ml.feature.HashingTF).
+In the following code segment, we start with a set of sentences.  We split each sentence into words using `Tokenizer`.  For each sentence (bag of words), we hash it into a feature vector.  This feature vector could then be passed to a learning algorithm.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+
+val sentenceDataFrame = sqlContext.createDataFrame(Seq(
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+)).toDF("label", "sentence")
+val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+val wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+val hashingTF = new HashingTF().setInputCol("words").setOutputCol("features").setNumFeatures(20)
+val featurized = hashingTF.transform(wordsDataFrame)
+featurized.select("features", "label").take(3).foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+  RowFactory.create(0, "Hi I heard about Spark"),
+  RowFactory.create(0, "I wish Java could use case classes"),
+  RowFactory.create(1, "Logistic regression models are neat")
+));
+StructType schema = new StructType(new StructField[]{
+  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+  new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+});
+DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
+Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+int numFeatures = 20;
+HashingTF hashingTF = new HashingTF()
+  .setInputCol("words")
+  .setOutputCol("features")
+  .setNumFeatures(numFeatures);
+DataFrame featurized = hashingTF.transform(wordsDataFrame);
+for (Row r : featurized.select("features", "label").take(3)) {
+  Vector features = r.getAs(0);
+  Double label = r.getDouble(1);
+  System.out.println(features);
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import HashingTF, Tokenizer
+
+sentenceDataFrame = sqlContext.createDataFrame([
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+], ["label", "sentence"])
+tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
+featurized = hashingTF.transform(wordsDataFrame)
+for features_label in featurized.select("features", "label").take(3):
+  print features_label
+{% endhighlight %}
+</div>
+</div>
+
+
+# Feature Transformers
+
+## Tokenizer
+
+[Tokenization](http://en.wikipedia.org/wiki/Lexical_analysis#Tokenization) is the process of taking text (such as a sentence) and breaking it into individual terms (usually words).  A simple [Tokenizer](api/scala/index.html#org.apache.spark.ml.feature.Tokenizer) class provides this functionality.  The example below shows how to split sentences into sequences of words.
+
+Note: A more advanced tokenizer is provided via [RegexTokenizer](api/scala/index.html#org.apache.spark.ml.feature.RegexTokenizer).
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.Tokenizer
+
+val sentenceDataFrame = sqlContext.createDataFrame(Seq(
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+)).toDF("label", "sentence")
+val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+val wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+wordsDataFrame.select("words", "label").take(3).foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+  RowFactory.create(0, "Hi I heard about Spark"),
+  RowFactory.create(0, "I wish Java could use case classes"),
+  RowFactory.create(1, "Logistic regression models are neat")
+));
+StructType schema = new StructType(new StructField[]{
+  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+  new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+});
+DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
+Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+for (Row r : wordsDataFrame.select("words", "label").take(3)) {
+  java.util.List<String> words = r.getList(0);
+  for (String word : words) System.out.print(word + " ");
+  System.out.println();
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import Tokenizer
+
+sentenceDataFrame = sqlContext.createDataFrame([
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+], ["label", "sentence"])
+tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+for words_label in wordsDataFrame.select("words", "label").take(3):
+  print words_label
+{% endhighlight %}
+</div>
+</div>
+
+
+# Feature Selectors
+
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 771a07183e26f..b7b6376e061f7 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -148,6 +148,15 @@ Parameters belong to specific instances of `Estimator`s and `Transformer`s.
 For example, if we have two `LogisticRegression` instances `lr1` and `lr2`, then we can build a `ParamMap` with both `maxIter` parameters specified: `ParamMap(lr1.maxIter -> 10, lr2.maxIter -> 20)`.
 This is useful if there are two algorithms with the `maxIter` parameter in a `Pipeline`.
 
+# Algorithm Guides
+
+There are now several algorithms in the Pipelines API which are not in the lower-level MLlib API, so we link to documentation for them here.  These algorithms are mostly feature transformers, which fit naturally into the `Transformer` abstraction in Pipelines.
+
+**Pipelines API Algorithm Guides**
+
+* [Feature Extraction, Transformation, and Selection](ml-features.html)
+
+
 # Code Examples
 
 This section gives code examples illustrating the functionality discussed above.
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
new file mode 100644
index 0000000000000..23463ab5fe848
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+
+public class JavaHashingTFSuite {
+  private transient JavaSparkContext jsc;
+  private transient SQLContext jsql;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaHashingTFSuite");
+    jsql = new SQLContext(jsc);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void hashingTF() {
+    JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+      RowFactory.create(0, "Hi I heard about Spark"),
+      RowFactory.create(0, "I wish Java could use case classes"),
+      RowFactory.create(1, "Logistic regression models are neat")
+    ));
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+    });
+    DataFrame sentenceDataFrame = jsql.createDataFrame(jrdd, schema);
+
+    Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+    DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+    int numFeatures = 20;
+    HashingTF hashingTF = new HashingTF()
+      .setInputCol("words")
+      .setOutputCol("features")
+      .setNumFeatures(numFeatures);
+    DataFrame featurized = hashingTF.transform(wordsDataFrame);
+    for (Row r : featurized.select("features", "words", "label").take(3)) {
+      Vector features = r.getAs(0);
+      Assert.assertEquals(features.size(), numFeatures);
+    }
+  }
+}

From 96c4846db89802f5a81dca5dcfa3f2a0f72b5cb8 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Tue, 12 May 2015 16:42:30 -0700
Subject: [PATCH 51/59] [SPARK-7573] [ML] OneVsRest cleanups

Minor cleanups discussed with [~mengxr]:
* move OneVsRest from reduction to classification sub-package
* make model constructor private

Some doc cleanups too

CC: harsha2010  Could you please verify this looks OK?  Thanks!

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6097 from jkbradley/onevsrest-cleanup and squashes the following commits:

4ecd48d [Joseph K. Bradley] org imports
430b065 [Joseph K. Bradley] moved OneVsRest from reduction subpackage to classification.  small java doc style fixes
9f8b9b9 [Joseph K. Bradley] Small cleanups to OneVsRest.  Made model constructor private to ml package.
---
 .../OneVsRest.scala                           | 32 +++++++++----------
 .../JavaOneVsRestSuite.java                   | 13 +++-----
 .../OneVsRestSuite.scala                      |  9 ++----
 3 files changed, 23 insertions(+), 31 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/ml/{reduction => classification}/OneVsRest.scala (90%)
 rename mllib/src/test/java/org/apache/spark/ml/{reduction => classification}/JavaOneVsRestSuite.java (90%)
 rename mllib/src/test/scala/org/apache/spark/ml/{reduction => classification}/OneVsRestSuite.scala (92%)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
similarity index 90%
rename from mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
rename to mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 0a6728ef1f779..afb8d75d57384 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.reduction
+package org.apache.spark.ml.classification
 
 import java.util.UUID
 
@@ -24,7 +24,6 @@ import scala.language.existentials
 import org.apache.spark.annotation.{AlphaComponent, Experimental}
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.classification.{ClassificationModel, Classifier}
 import org.apache.spark.ml.param.Param
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
@@ -57,20 +56,21 @@ private[ml] trait OneVsRestParams extends PredictorParams {
 }
 
 /**
+ * :: AlphaComponent ::
+ *
  * Model produced by [[OneVsRest]].
- * Stores the models resulting from training k different classifiers:
- * one for each class.
- * Each example is scored against all k models and the model with highest score
+ * This stores the models resulting from training k binary classifiers: one for each class.
+ * Each example is scored against all k models, and the model with the highest score
  * is picked to label the example.
- * TODO: API may need to change when we introduce a ClassificationModel trait as the public API
- * @param parent
+ *
  * @param labelMetadata Metadata of label column if it exists, or Nominal attribute
  *                      representing the number of classes in training dataset otherwise.
- * @param models the binary classification models for reduction.
- *               The i-th model is produced by testing the i-th class vs the rest.
+ * @param models The binary classification models for the reduction.
+ *               The i-th model is produced by testing the i-th class (taking label 1) vs the rest
+ *               (taking label 0).
  */
 @AlphaComponent
-class OneVsRestModel(
+class OneVsRestModel private[ml] (
       override val parent: OneVsRest,
       labelMetadata: Metadata,
       val models: Array[_ <: ClassificationModel[_,_]])
@@ -90,7 +90,7 @@ class OneVsRestModel(
     // add an accumulator column to store predictions of all the models
     val accColName = "mbc$acc" + UUID.randomUUID().toString
     val init: () => Map[Int, Double] = () => {Map()}
-    val mapType = MapType(IntegerType, DoubleType, false)
+    val mapType = MapType(IntegerType, DoubleType, valueContainsNull = false)
     val newDataset = dataset.withColumn(accColName, callUDF(init, mapType))
 
     // persist if underlying dataset is not persistent.
@@ -101,7 +101,7 @@ class OneVsRestModel(
 
     // update the accumulator column with the result of prediction of models
     val aggregatedDataset = models.zipWithIndex.foldLeft[DataFrame](newDataset) {
-      case (df, (model, index)) => {
+      case (df, (model, index)) =>
         val rawPredictionCol = model.getRawPredictionCol
         val columns = origCols ++ List(col(rawPredictionCol), col(accColName))
 
@@ -110,7 +110,7 @@ class OneVsRestModel(
         val update: (Map[Int, Double], Vector) => Map[Int, Double]  =
           (predictions: Map[Int, Double], prediction: Vector) => {
             predictions + ((index, prediction(1)))
-        }
+          }
         val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol))
         val transformedDataset = model.transform(df).select(columns:_*)
         val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf)
@@ -118,7 +118,6 @@ class OneVsRestModel(
 
         // switch out the intermediate column with the accumulator column
         updatedDataset.select(newColumns:_*).withColumnRenamed(tmpColName, accColName)
-      }
     }
 
     if (handlePersistence) {
@@ -149,8 +148,8 @@ class OneVsRestModel(
 final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
 
   /** @group setParam */
-  // TODO: Find a better way to do this. Existential Types don't work with Java API so cast needed.
   def setClassifier(value: Classifier[_,_,_]): this.type = {
+    // TODO: Find a better way to do this. Existential Types don't work with Java API so cast needed
     set(classifier, value.asInstanceOf[ClassifierType])
   }
 
@@ -201,9 +200,8 @@ final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
     // extract label metadata from label column if present, or create a nominal attribute
     // to output the number of labels
     val labelAttribute = Attribute.fromStructField(labelSchema) match {
-      case _: NumericAttribute | UnresolvedAttribute => {
+      case _: NumericAttribute | UnresolvedAttribute =>
         NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses)
-      }
       case attr: Attribute => attr
     }
     copyValues(new OneVsRestModel(this, labelAttribute.toMetadata(), models))
diff --git a/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
similarity index 90%
rename from mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
rename to mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
index 40a90ae9ded60..a1ee554152372 100644
--- a/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
@@ -15,21 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.reduction;
+package org.apache.spark.ml.classification;
 
 import java.io.Serializable;
 import java.util.List;
 
+import static scala.collection.JavaConversions.seqAsJavaList;
+
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import static scala.collection.JavaConversions.seqAsJavaList;
-
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.ml.classification.LogisticRegression;
 import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateMultinomialLogisticInput;
 import org.apache.spark.mllib.regression.LabeledPoint;
 import org.apache.spark.sql.DataFrame;
@@ -48,10 +47,8 @@ public void setUp() {
         jsql = new SQLContext(jsc);
         int nPoints = 3;
 
-        /**
-         * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
-         * As a result, we are actually drawing samples from probability distribution of built model.
-         */
+        // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+        // As a result, we are drawing samples from probability distribution of an actual model.
         double[] weights = {
                 -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
                 -0.16624, -0.84355, -0.048509, -0.301789, 4.170682 };
diff --git a/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
similarity index 92%
rename from mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
rename to mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index ebec7c68e8144..e65ffae918ca9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -15,12 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.reduction
+package org.apache.spark.ml.classification
 
 import org.scalatest.FunSuite
 
 import org.apache.spark.ml.attribute.NominalAttribute
-import org.apache.spark.ml.classification.{LogisticRegressionModel, LogisticRegression}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
@@ -42,10 +41,8 @@ class OneVsRestSuite extends FunSuite with MLlibTestSparkContext {
     sqlContext = new SQLContext(sc)
     val nPoints = 1000
 
-    /**
-     * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
-     * As a result, we are actually drawing samples from probability distribution of built model.
-     */
+    // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+    // As a result, we are drawing samples from probability distribution of an actual model.
     val weights = Array(
       -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
       -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)

From 00e7b09a0bee2fcfd0ce34992bd26435758daf26 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 12 May 2015 16:44:14 -0700
Subject: [PATCH 52/59] [SPARK-7553] [STREAMING] Added methods to maintain a
 singleton StreamingContext

In a REPL/notebook environment, its very easy to lose a reference to a StreamingContext by overriding the variable name. So if you happen to execute the following commands
```
val ssc = new StreamingContext(...) // cmd 1
ssc.start() // cmd 2
...
val ssc = new StreamingContext(...) // accidentally run cmd 1 again
```
The value of ssc will be overwritten. Now you can neither start the new context (as only one context can be started), nor stop the previous context (as the reference is lost).
Hence its best to maintain a singleton reference to the active context, so that we never loose reference for the active context.
Since this problem occurs useful in REPL environments, its best to add this as an Experimental support in the Scala API only so that it can be used in Scala REPLs and notebooks.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6070 from tdas/SPARK-7553 and squashes the following commits:

731c9a1 [Tathagata Das] Fixed style
a797171 [Tathagata Das] Added more unit tests
19fc70b [Tathagata Das] Added :: Experimental :: in docs
64706c9 [Tathagata Das] Fixed test
634db5d [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7553
3884a25 [Tathagata Das] Fixing test bug
d37a846 [Tathagata Das] Added getActive and getActiveOrCreate
---
 .../spark/streaming/StreamingContext.scala    |  61 ++++++-
 .../streaming/StreamingContextSuite.scala     | 152 +++++++++++++++++-
 2 files changed, 202 insertions(+), 11 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 8461e901208ff..407cab45ed4c6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -637,8 +637,10 @@ class StreamingContext private[streaming] (
  */
 
 object StreamingContext extends Logging {
+
   /**
-   * Lock that guards access to global variables that track active StreamingContext.
+   * Lock that guards activation of a StreamingContext as well as access to the singleton active
+   * StreamingContext in getActiveOrCreate().
    */
   private val ACTIVATION_LOCK = new Object()
 
@@ -661,6 +663,18 @@ object StreamingContext extends Logging {
     }
   }
 
+  /**
+   * :: Experimental ::
+   *
+   * Get the currently active context, if there is one. Active means started but not stopped.
+   */
+  @Experimental
+  def getActive(): Option[StreamingContext] = {
+    ACTIVATION_LOCK.synchronized {
+      Option(activeContext.get())
+    }
+  }
+
   @deprecated("Replaced by implicit functions in the DStream companion object. This is " +
     "kept here only for backward compatibility.", "1.3.0")
   def toPairDStreamFunctions[K, V](stream: DStream[(K, V)])
@@ -669,6 +683,48 @@ object StreamingContext extends Logging {
     DStream.toPairDStreamFunctions(stream)(kt, vt, ord)
   }
 
+  /**
+   * :: Experimental ::
+   *
+   * Either return the "active" StreamingContext (that is, started but not stopped), or create a
+   * new StreamingContext that is
+   * @param creatingFunc   Function to create a new StreamingContext
+   */
+  @Experimental
+  def getActiveOrCreate(creatingFunc: () => StreamingContext): StreamingContext = {
+    ACTIVATION_LOCK.synchronized {
+      getActive().getOrElse { creatingFunc() }
+    }
+  }
+
+  /**
+   * :: Experimental ::
+   *
+   * Either get the currently active StreamingContext (that is, started but not stopped),
+   * OR recreate a StreamingContext from checkpoint data in the given path. If checkpoint data
+   * does not exist in the provided, then create a new StreamingContext by calling the provided
+   * `creatingFunc`.
+   *
+   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
+   * @param creatingFunc   Function to create a new StreamingContext
+   * @param hadoopConf     Optional Hadoop configuration if necessary for reading from the
+   *                       file system
+   * @param createOnError  Optional, whether to create a new StreamingContext if there is an
+   *                       error in reading checkpoint data. By default, an exception will be
+   *                       thrown on error.
+   */
+  @Experimental
+  def getActiveOrCreate(
+      checkpointPath: String,
+      creatingFunc: () => StreamingContext,
+      hadoopConf: Configuration = new Configuration(),
+      createOnError: Boolean = false
+    ): StreamingContext = {
+    ACTIVATION_LOCK.synchronized {
+      getActive().getOrElse { getOrCreate(checkpointPath, creatingFunc, hadoopConf, createOnError) }
+    }
+  }
+
   /**
    * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
    * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
@@ -694,7 +750,6 @@ object StreamingContext extends Logging {
     checkpointOption.map(new StreamingContext(null, _, null)).getOrElse(creatingFunc())
   }
 
-
   /**
    * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
    * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
@@ -761,7 +816,7 @@ object StreamingContext extends Logging {
     ): SparkContext = {
     val conf = SparkContext.updatedConf(
       new SparkConf(), master, appName, sparkHome, jars, environment)
-    createNewSparkContext(conf)
+    new SparkContext(conf)
   }
 
   private[streaming] def rddToFileName[T](prefix: String, suffix: String, time: Time): String = {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 47299513de095..5d09b234f77ce 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -41,6 +41,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   val batchDuration = Milliseconds(500)
   val sparkHome = "someDir"
   val envPair = "key" -> "value"
+  val conf = new SparkConf().setMaster(master).setAppName(appName)
 
   var sc: SparkContext = null
   var ssc: StreamingContext = null
@@ -390,23 +391,23 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
       assert(newContextCreated, "new context not created")
     }
 
-    val corrutedCheckpointPath = createCorruptedCheckpoint()
+    val corruptedCheckpointPath = createCorruptedCheckpoint()
 
     // getOrCreate should throw exception with fake checkpoint file and createOnError = false
     intercept[Exception] {
-      ssc = StreamingContext.getOrCreate(corrutedCheckpointPath, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _)
     }
 
     // getOrCreate should throw exception with fake checkpoint file
     intercept[Exception] {
       ssc = StreamingContext.getOrCreate(
-        corrutedCheckpointPath, creatingFunction _, createOnError = false)
+        corruptedCheckpointPath, creatingFunction _, createOnError = false)
     }
 
     // getOrCreate should create new context with fake checkpoint file and createOnError = true
     testGetOrCreate {
       ssc = StreamingContext.getOrCreate(
-        corrutedCheckpointPath, creatingFunction _, createOnError = true)
+        corruptedCheckpointPath, creatingFunction _, createOnError = true)
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
     }
@@ -491,8 +492,145 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     }
   }
 
+  test("getActive and getActiveOrCreate") {
+    require(StreamingContext.getActive().isEmpty, "context exists from before")
+    sc = new SparkContext(conf)
+
+    var newContextCreated = false
+
+    def creatingFunc(): StreamingContext = {
+      newContextCreated = true
+      val newSsc = new StreamingContext(sc, batchDuration)
+      val input = addInputStream(newSsc)
+      input.foreachRDD { rdd => rdd.count }
+      newSsc
+    }
+
+    def testGetActiveOrCreate(body: => Unit): Unit = {
+      newContextCreated = false
+      try {
+        body
+      } finally {
+
+        if (ssc != null) {
+          ssc.stop(stopSparkContext = false)
+        }
+        ssc = null
+      }
+    }
+
+    // getActiveOrCreate should create new context and getActive should return it only
+    // after starting the context
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(creatingFunc _)
+      assert(ssc != null, "no context created")
+      assert(newContextCreated === true, "new context not created")
+      assert(StreamingContext.getActive().isEmpty,
+        "new initialized context returned before starting")
+      ssc.start()
+      assert(StreamingContext.getActive() === Some(ssc),
+        "active context not returned")
+      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+        "active context not returned")
+      ssc.stop()
+      assert(StreamingContext.getActive().isEmpty,
+        "inactive context returned")
+      assert(StreamingContext.getActiveOrCreate(creatingFunc _) !== ssc,
+        "inactive context returned")
+    }
+
+    // getActiveOrCreate and getActive should return independently created context after activating
+    testGetActiveOrCreate {
+      ssc = creatingFunc()  // Create
+      assert(StreamingContext.getActive().isEmpty,
+        "new initialized context returned before starting")
+      ssc.start()
+      assert(StreamingContext.getActive() === Some(ssc),
+        "active context not returned")
+      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+        "active context not returned")
+      ssc.stop()
+      assert(StreamingContext.getActive().isEmpty,
+        "inactive context returned")
+    }
+  }
+
+  test("getActiveOrCreate with checkpoint") {
+    // Function to create StreamingContext that has a config to identify it to be new context
+    var newContextCreated = false
+    def creatingFunction(): StreamingContext = {
+      newContextCreated = true
+      new StreamingContext(conf, batchDuration)
+    }
+
+    // Call ssc.stop after a body of code
+    def testGetActiveOrCreate(body: => Unit): Unit = {
+      require(StreamingContext.getActive().isEmpty) // no active context
+      newContextCreated = false
+      try {
+        body
+      } finally {
+        if (ssc != null) {
+          ssc.stop()
+        }
+        ssc = null
+      }
+    }
+
+    val emptyPath = Utils.createTempDir().getAbsolutePath()
+    val corruptedCheckpointPath = createCorruptedCheckpoint()
+    val checkpointPath = createValidCheckpoint()
+
+    // getActiveOrCreate should return the current active context if there is one
+    testGetActiveOrCreate {
+      ssc = new StreamingContext(
+        conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"), batchDuration)
+      addInputStream(ssc).register()
+      ssc.start()
+      val returnedSsc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _)
+      assert(!newContextCreated, "new context created instead of returning")
+      assert(returnedSsc.eq(ssc), "returned context is not the activated context")
+    }
+
+    // getActiveOrCreate should create new context with empty path
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(emptyPath, creatingFunction _)
+      assert(ssc != null, "no context created")
+      assert(newContextCreated, "new context not created")
+    }
+
+    // getActiveOrCreate should throw exception with fake checkpoint file and createOnError = false
+    intercept[Exception] {
+      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _)
+    }
+
+    // getActiveOrCreate should throw exception with fake checkpoint file
+    intercept[Exception] {
+      ssc = StreamingContext.getActiveOrCreate(
+        corruptedCheckpointPath, creatingFunction _, createOnError = false)
+    }
+
+    // getActiveOrCreate should create new context with fake
+    // checkpoint file and createOnError = true
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(
+        corruptedCheckpointPath, creatingFunction _, createOnError = true)
+      assert(ssc != null, "no context created")
+      assert(newContextCreated, "new context not created")
+    }
+
+    // getActiveOrCreate should recover context with checkpoint path, and recover old configuration
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _)
+      assert(ssc != null, "no context created")
+      assert(!newContextCreated, "old context not recovered")
+      assert(ssc.conf.get("someKey") === "someValue")
+    }
+  }
+
   test("multiple streaming contexts") {
-    sc = new SparkContext(new SparkConf().setMaster(master).setAppName(appName))
+    sc = new SparkContext(
+      conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"))
     ssc = new StreamingContext(sc, Seconds(1))
     val input = addInputStream(ssc)
     input.foreachRDD { rdd => rdd.count }
@@ -522,9 +660,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   def createValidCheckpoint(): String = {
     val testDirectory = Utils.createTempDir().getAbsolutePath()
     val checkpointDirectory = Utils.createTempDir().getAbsolutePath()
-    val conf = new SparkConf().setMaster(master).setAppName(appName)
-    conf.set("someKey", "someValue")
-    ssc = new StreamingContext(conf, batchDuration)
+    ssc = new StreamingContext(conf.clone.set("someKey", "someValue"), batchDuration)
     ssc.checkpoint(checkpointDirectory)
     ssc.textFileStream(testDirectory).foreachRDD { rdd => rdd.count() }
     ssc.start()

From 2713bc65af1e0e81edd5fad0338e34fd127391f9 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 16:53:47 -0700
Subject: [PATCH 53/59] [SPARK-7528] [MLLIB] make RankingMetrics Java-friendly

`RankingMetrics` contains a ClassTag, which is hard to create in Java. This PR adds a factory method `of` for Java users. coderxiang

Author: Xiangrui Meng <meng@databricks.com>

Closes #6098 from mengxr/SPARK-7528 and squashes the following commits:

e5d57ae [Xiangrui Meng] make RankingMetrics Java-friendly
---
 .../mllib/evaluation/RankingMetrics.scala     | 27 ++++++--
 .../evaluation/JavaRankingMetricsSuite.java   | 64 +++++++++++++++++++
 2 files changed, 87 insertions(+), 4 deletions(-)
 create mode 100644 mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 93a7353e2c070..b9b54b93c27fa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.mllib.evaluation
 
+import java.{lang => jl}
+
+import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -71,7 +74,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         logWarning("Empty ground truth set, check input data")
         0.0
       }
-    }.mean
+    }.mean()
   }
 
   /**
@@ -100,7 +103,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         logWarning("Empty ground truth set, check input data")
         0.0
       }
-    }.mean
+    }.mean()
   }
 
   /**
@@ -146,7 +149,23 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         logWarning("Empty ground truth set, check input data")
         0.0
       }
-    }.mean
+    }.mean()
   }
 
 }
+
+@Experimental
+object RankingMetrics {
+
+  /**
+   * Creates a [[RankingMetrics]] instance (for Java users).
+   * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs
+   */
+  def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = {
+    implicit val tag = JavaSparkContext.fakeClassTag[E]
+    val rdd = predictionAndLabels.rdd.map { case (predictions, labels) =>
+      (predictions.asScala.toArray, labels.asScala.toArray)
+    }
+    new RankingMetrics(rdd)
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
new file mode 100644
index 0000000000000..effc8a1a6dabc
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+import scala.Tuple2;
+import scala.Tuple2$;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class JavaRankingMetricsSuite implements Serializable {
+  private transient JavaSparkContext sc;
+  private transient JavaRDD<Tuple2<ArrayList<Integer>, ArrayList<Integer>>> predictionAndLabels;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaRankingMetricsSuite");
+    predictionAndLabels = sc.parallelize(Lists.newArrayList(
+      Tuple2$.MODULE$.apply(
+        Lists.newArrayList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Lists.newArrayList(1, 2, 3, 4, 5)),
+      Tuple2$.MODULE$.apply(
+        Lists.newArrayList(4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Lists.newArrayList(1, 2, 3)),
+      Tuple2$.MODULE$.apply(
+        Lists.newArrayList(1, 2, 3, 4, 5), Lists.<Integer>newArrayList())), 2);
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void rankingMetrics() {
+    @SuppressWarnings("unchecked")
+    RankingMetrics<?> metrics = RankingMetrics.of(predictionAndLabels);
+    Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5);
+    Assert.assertEquals(0.75 / 3.0, metrics.precisionAt(4), 1e-5);
+  }
+}

From 23f7d66d51c8809ebc27bfbce3d95515e9b34c2e Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 12 May 2015 17:07:21 -0700
Subject: [PATCH 54/59] [SPARK-7554] [STREAMING] Throw exception when an
 active/stopped StreamingContext is used to create DStreams and output
 operations

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6099 from tdas/SPARK-7554 and squashes the following commits:

2cd4158 [Tathagata Das] Throw exceptions on attempts to add stuff to active and stopped contexts.
---
 .../apache/spark/streaming/DStreamGraph.scala |  2 +-
 .../spark/streaming/dstream/DStream.scala     | 21 +++++++++-
 .../streaming/StreamingContextSuite.scala     | 39 +++++++++++++++++++
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 9c7f698840778..85b354ff4aa0d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -45,7 +45,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
       startTime = time
       outputStreams.foreach(_.initialize(zeroTime))
       outputStreams.foreach(_.remember(rememberDuration))
-      outputStreams.foreach(_.validate)
+      outputStreams.foreach(_.validateAtStart)
       inputStreams.par.foreach(_.start())
     }
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 7092a3d3f0b86..64de7526a6a34 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -60,6 +60,8 @@ abstract class DStream[T: ClassTag] (
     @transient private[streaming] var ssc: StreamingContext
   ) extends Serializable with Logging {
 
+  validateAtInit()
+
   // =======================================================================
   // Methods that should be implemented by subclasses of DStream
   // =======================================================================
@@ -171,7 +173,22 @@ abstract class DStream[T: ClassTag] (
     dependencies.foreach(_.initialize(zeroTime))
   }
 
-  private[streaming] def validate() {
+  private def validateAtInit(): Unit = {
+    ssc.getState() match {
+      case StreamingContextState.INITIALIZED =>
+        // good to go
+      case StreamingContextState.ACTIVE =>
+        throw new SparkException(
+          "Adding new inputs, transformations, and output operations after " +
+            "starting a context is not supported")
+      case StreamingContextState.STOPPED =>
+        throw new SparkException(
+          "Adding new inputs, transformations, and output operations after " +
+            "stopping a context is not supported")
+    }
+  }
+
+  private[streaming] def validateAtStart() {
     assert(rememberDuration != null, "Remember duration is set to null")
 
     assert(
@@ -226,7 +243,7 @@ abstract class DStream[T: ClassTag] (
         math.ceil(rememberDuration.milliseconds / 1000.0).toInt + " seconds."
     )
 
-    dependencies.foreach(_.validate())
+    dependencies.foreach(_.validateAtStart())
 
     logInfo("Slide time = " + slideDuration)
     logInfo("Storage level = " + storageLevel)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 5d09b234f77ce..5f93332896de1 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -651,6 +651,45 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     testPackage.test()
   }
 
+  test("throw exception on using active or stopped context") {
+    val conf = new SparkConf()
+      .setMaster(master)
+      .setAppName(appName)
+      .set("spark.streaming.clock", "org.apache.spark.util.ManualClock")
+    ssc = new StreamingContext(conf, batchDuration)
+    require(ssc.getState() === StreamingContextState.INITIALIZED)
+    val input = addInputStream(ssc)
+    val transformed = input.map { x => x}
+    transformed.foreachRDD { rdd => rdd.count }
+
+    def testForException(clue: String, expectedErrorMsg: String)(body: => Unit): Unit = {
+      withClue(clue) {
+        val ex = intercept[SparkException] {
+          body
+        }
+        assert(ex.getMessage.toLowerCase().contains(expectedErrorMsg))
+      }
+    }
+
+    ssc.start()
+    require(ssc.getState() === StreamingContextState.ACTIVE)
+    testForException("no error on adding input after start", "start") {
+      addInputStream(ssc) }
+    testForException("no error on adding transformation after start", "start") {
+      input.map { x => x * 2 } }
+    testForException("no error on adding output operation after start", "start") {
+      transformed.foreachRDD { rdd => rdd.collect() } }
+
+    ssc.stop()
+    require(ssc.getState() === StreamingContextState.STOPPED)
+    testForException("no error on adding input after stop", "stop") {
+      addInputStream(ssc) }
+    testForException("no error on adding transformation after stop", "stop") {
+      input.map { x => x * 2 } }
+    testForException("no error on adding output operation after stop", "stop") {
+      transformed.foreachRDD { rdd => rdd.collect() } }
+  }
+
   def addInputStream(s: StreamingContext): DStream[Int] = {
     val input = (1 to 100).map(i => 1 to i)
     val inputStream = new TestInputStream(s, input, 1)

From 77f64c736d07a44f64393910d092091e8ba6047a Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 17:15:39 -0700
Subject: [PATCH 55/59] [SPARK-7572] [MLLIB] do not import Param/Params under
 pyspark.ml

Remove `Param` and `Params` from `pyspark.ml` and add a section in the doc. brkyvz

Author: Xiangrui Meng <meng@databricks.com>

Closes #6094 from mengxr/SPARK-7572 and squashes the following commits:

022abd6 [Xiangrui Meng] do not import Param/Params under spark.ml
---
 python/docs/pyspark.ml.rst    | 10 +++++++++-
 python/pyspark/ml/__init__.py |  5 ++---
 python/pyspark/ml/pipeline.py |  3 ---
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
index a42217a4aee73..8379b8fc8a1e1 100644
--- a/python/docs/pyspark.ml.rst
+++ b/python/docs/pyspark.ml.rst
@@ -1,7 +1,7 @@
 pyspark.ml package
 =====================
 
-Module Context
+ML Pipeline APIs
 --------------
 
 .. automodule:: pyspark.ml
@@ -9,6 +9,14 @@ Module Context
     :undoc-members:
     :inherited-members:
 
+pyspark.ml.param module
+-------------------------
+
+.. automodule:: pyspark.ml.param
+    :members:
+    :undoc-members:
+    :inherited-members:
+
 pyspark.ml.feature module
 -------------------------
 
diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py
index 47fed80f42e13..da793d9db7f91 100644
--- a/python/pyspark/ml/__init__.py
+++ b/python/pyspark/ml/__init__.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-from pyspark.ml.param import *
-from pyspark.ml.pipeline import *
+from pyspark.ml.pipeline import Transformer, Estimator, Model, Pipeline, PipelineModel, Evaluator
 
-__all__ = ["Param", "Params", "Transformer", "Estimator", "Pipeline"]
+__all__ = ["Transformer", "Estimator", "Model", "Pipeline", "PipelineModel", "Evaluator"]
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index fdbae06405f6a..a328bcf84a2e7 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -22,9 +22,6 @@
 from pyspark.mllib.common import inherit_doc
 
 
-__all__ = ['Estimator', 'Transformer', 'Pipeline', 'PipelineModel', 'Evaluator', 'Model']
-
-
 @inherit_doc
 class Estimator(Params):
     """

From 247b70349c1e4413657359d626d92e0ffbc2b7f1 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Wed, 13 May 2015 08:33:24 +0800
Subject: [PATCH 56/59] [HOTFIX] Use the old Job API to support old Hadoop
 versions

#5526 uses `Job.getInstance`, which does not exist in the old Hadoop versions. Just use `new Job` to replace it.

cc liancheng

Author: zsxwing <zsxwing@gmail.com>

Closes #6095 from zsxwing/hotfix and squashes the following commits:

b0c2049 [zsxwing] Use the old Job API to support old Hadoop versions
---
 .../src/main/scala/org/apache/spark/sql/sources/commands.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index 127133bfaf662..8372d2c34acc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -88,7 +88,7 @@ private[sql] case class InsertIntoFSBasedRelation(
     }
 
     if (doInsertion) {
-      val job = Job.getInstance(hadoopConf)
+      val job = new Job(hadoopConf)
       job.setOutputKeyClass(classOf[Void])
       job.setOutputValueClass(classOf[Row])
       FileOutputFormat.setOutputPath(job, qualifiedOutputPath)

From 1b9e434b6c19f23a01e9875a3c1966cd03ce8e2d Mon Sep 17 00:00:00 2001
From: Patrick Wendell <patrick@databricks.com>
Date: Tue, 12 May 2015 18:20:54 -0700
Subject: [PATCH 57/59] [SPARK-7592] Always set resolution to "Fixed" in PR
 merge script.

The issue is that the behavior of the ASF JIRA silently
changed. Now when the "Resolve Issue" transition occurs,
the default resolution is "Pending Closed". We used to
count on the default behavior being to set the
resolution as "Fixed".

The solution is to explicitly set the resolution as "Fixed" and not
count on default behavior.

Author: Patrick Wendell <patrick@databricks.com>

Closes #6103 from pwendell/merge-script-fix and squashes the following commits:

dcc16a6 [Patrick Wendell] Always set resolution to "Fixed" in PR merge script.
---
 dev/merge_spark_pr.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index b69cd15f99f63..f952c9d0b15e2 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -265,8 +265,11 @@ def get_version_json(version_str):
     jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)
 
     resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
+    resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
+    custom_fields = {'resolution': {'id': resolution.raw['id']}}
     asf_jira.transition_issue(
-        jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment)
+        jira_id, resolve["id"], fixVersions = jira_fix_versions, 
+        comment = comment, fields = custom_fields)
 
     print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
 

From 8fd55358b7fc1c7545d823bef7b39769f731c1ee Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 12 May 2015 18:37:02 -0700
Subject: [PATCH 58/59] [SPARK-7588] Document all SQL/DataFrame public methods
 with @since tag

This pull request adds since tag to all public methods/classes in SQL/DataFrame to indicate which version the methods/classes were first added.

Author: Reynold Xin <rxin@databricks.com>

Closes #6101 from rxin/tbc and squashes the following commits:

ed55e11 [Reynold Xin] Add since version to all DataFrame methods.
---
 .../java/org/apache/spark/sql/SaveMode.java   |  10 ++
 .../scala/org/apache/spark/sql/Column.scala   | 145 ++++++++++++++++--
 .../org/apache/spark/sql/DataFrame.scala      | 103 +++++++++++++
 .../apache/spark/sql/DataFrameHolder.scala    |   2 +
 .../spark/sql/DataFrameNaFunctions.scala      |  44 ++++++
 .../spark/sql/DataFrameStatFunctions.scala    |  18 +++
 .../spark/sql/ExperimentalMethods.scala       |   4 +
 .../org/apache/spark/sql/GroupedData.scala    |  22 +++
 .../apache/spark/sql/JavaTypeInference.scala  |   3 +-
 .../org/apache/spark/sql/SQLContext.scala     |  94 +++++++++++-
 .../org/apache/spark/sql/SparkSQLParser.scala |   1 -
 .../apache/spark/sql/UDFRegistration.scala    |  49 ++++++
 .../spark/sql/UserDefinedFunction.scala       |   4 +
 .../org/apache/spark/sql/functions.scala      | 140 ++++++++++++++++-
 .../apache/spark/sql/sources/filters.scala    |  30 ++++
 .../apache/spark/sql/sources/interfaces.scala |  57 +++++++
 .../apache/spark/sql/hive/HiveContext.scala   |   6 +
 17 files changed, 706 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
index a40be526d0d11..9665c3c46f901 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
@@ -18,28 +18,38 @@
 
 /**
  * SaveMode is used to specify the expected behavior of saving a DataFrame to a data source.
+ *
+ * @since 1.3.0
  */
 public enum SaveMode {
   /**
    * Append mode means that when saving a DataFrame to a data source, if data/table already exists,
    * contents of the DataFrame are expected to be appended to existing data.
+   *
+   * @since 1.3.0
    */
   Append,
   /**
    * Overwrite mode means that when saving a DataFrame to a data source,
    * if data/table already exists, existing data is expected to be overwritten by the contents of
    * the DataFrame.
+   *
+   * @since 1.3.0
    */
   Overwrite,
   /**
    * ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists,
    * an exception is expected to be thrown.
+   *
+   * @since 1.3.0
    */
   ErrorIfExists,
   /**
    * Ignore mode means that when saving a DataFrame to a data source, if data already exists,
    * the save operation is expected to not save the contents of the DataFrame and to not
    * change the existing data.
+   *
+   * @since 1.3.0
    */
   Ignore
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 4d50821620f5e..4773dedf72117 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -45,6 +45,8 @@ private[sql] object Column {
  * @groupname expr_ops Expression operators.
  * @groupname df_ops DataFrame functions.
  * @groupname Ungrouped Support functions for DataFrames.
+ *
+ * @since 1.3.0
  */
 @Experimental
 class Column(protected[sql] val expr: Expression) extends Logging {
@@ -77,6 +79,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    *   of every struct in that array, and return an Array of fields
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr)
 
@@ -92,6 +95,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def unary_- : Column = UnaryMinus(expr)
 
@@ -107,6 +111,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def unary_! : Column = Not(expr)
 
@@ -122,6 +127,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def === (other: Any): Column = {
     val right = lit(other).expr
@@ -145,6 +151,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def equalTo(other: Any): Column = this === other
 
@@ -161,6 +168,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def !== (other: Any): Column = Not(EqualTo(expr, lit(other).expr))
 
@@ -177,6 +185,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def notEqual(other: Any): Column = Not(EqualTo(expr, lit(other).expr))
 
@@ -192,6 +201,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def > (other: Any): Column = GreaterThan(expr, lit(other).expr)
 
@@ -207,6 +217,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def gt(other: Any): Column = this > other
 
@@ -221,6 +232,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def < (other: Any): Column = LessThan(expr, lit(other).expr)
 
@@ -235,6 +247,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def lt(other: Any): Column = this < other
 
@@ -249,6 +262,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def <= (other: Any): Column = LessThanOrEqual(expr, lit(other).expr)
 
@@ -263,6 +277,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def leq(other: Any): Column = this <= other
 
@@ -277,6 +292,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def >= (other: Any): Column = GreaterThanOrEqual(expr, lit(other).expr)
 
@@ -291,6 +307,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def geq(other: Any): Column = this >= other
 
@@ -298,6 +315,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Equality test that is safe for null values.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def <=> (other: Any): Column = EqualNullSafe(expr, lit(other).expr)
 
@@ -305,6 +323,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Equality test that is safe for null values.
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def eqNullSafe(other: Any): Column = this <=> other
 
@@ -312,6 +331,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * True if the current column is between the lower bound and upper bound, inclusive.
    *
    * @group java_expr_ops
+   * @since 1.4.0
    */
   def between(lowerBound: Any, upperBound: Any): Column = {
     (this >= lowerBound) && (this <= upperBound)
@@ -321,6 +341,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * True if the current expression is null.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def isNull: Column = IsNull(expr)
 
@@ -328,6 +349,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * True if the current expression is NOT null.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def isNotNull: Column = IsNotNull(expr)
 
@@ -342,6 +364,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def || (other: Any): Column = Or(expr, lit(other).expr)
 
@@ -356,6 +379,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def or(other: Column): Column = this || other
 
@@ -370,6 +394,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def && (other: Any): Column = And(expr, lit(other).expr)
 
@@ -384,6 +409,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def and(other: Column): Column = this && other
 
@@ -398,6 +424,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def + (other: Any): Column = Add(expr, lit(other).expr)
 
@@ -412,6 +439,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def plus(other: Any): Column = this + other
 
@@ -426,6 +454,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def - (other: Any): Column = Subtract(expr, lit(other).expr)
 
@@ -440,6 +469,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def minus(other: Any): Column = this - other
 
@@ -454,6 +484,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def * (other: Any): Column = Multiply(expr, lit(other).expr)
 
@@ -468,6 +499,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def multiply(other: Any): Column = this * other
 
@@ -482,6 +514,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def / (other: Any): Column = Divide(expr, lit(other).expr)
 
@@ -496,6 +529,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def divide(other: Any): Column = this / other
 
@@ -503,6 +537,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Modulo (a.k.a. remainder) expression.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def % (other: Any): Column = Remainder(expr, lit(other).expr)
 
@@ -510,6 +545,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Modulo (a.k.a. remainder) expression.
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def mod(other: Any): Column = this % other
 
@@ -518,6 +554,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * by the evaluated values of the arguments.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def in(list: Column*): Column = In(expr, list.map(_.expr))
@@ -526,6 +563,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * SQL like expression.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def like(literal: String): Column = Like(expr, lit(literal).expr)
 
@@ -533,6 +571,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * SQL RLIKE expression (LIKE with Regex).
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def rlike(literal: String): Column = RLike(expr, lit(literal).expr)
 
@@ -541,6 +580,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * or gets a value by key `key` in a [[MapType]].
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def getItem(key: Any): Column = UnresolvedExtractValue(expr, Literal(key))
 
@@ -548,6 +588,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * An expression that gets a field by name in a [[StructType]].
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def getField(fieldName: String): Column = UnresolvedExtractValue(expr, Literal(fieldName))
 
@@ -557,6 +598,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * @param len expression for the length of the substring.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def substr(startPos: Column, len: Column): Column = Substring(expr, startPos.expr, len.expr)
 
@@ -566,6 +608,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * @param len length of the substring.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def substr(startPos: Int, len: Int): Column = Substring(expr, lit(startPos).expr, lit(len).expr)
 
@@ -573,6 +616,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Contains the other element.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def contains(other: Any): Column = Contains(expr, lit(other).expr)
 
@@ -580,6 +624,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String starts with.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def startsWith(other: Column): Column = StartsWith(expr, lit(other).expr)
 
@@ -587,6 +632,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String starts with another string literal.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def startsWith(literal: String): Column = this.startsWith(lit(literal))
 
@@ -594,6 +640,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String ends with.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def endsWith(other: Column): Column = EndsWith(expr, lit(other).expr)
 
@@ -601,6 +648,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String ends with another string literal.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def endsWith(literal: String): Column = this.endsWith(lit(literal))
 
@@ -612,6 +660,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def as(alias: String): Column = Alias(expr, alias)()
 
@@ -623,6 +672,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def as(alias: Symbol): Column = Alias(expr, alias.name)()
 
@@ -634,6 +684,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def as(alias: String, metadata: Metadata): Column = {
     Alias(expr, alias)(explicitMetadata = Some(metadata))
@@ -651,6 +702,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def cast(to: DataType): Column = expr match {
     // Lift alias out of cast so we can support col.as("name").cast(IntegerType)
@@ -668,6 +720,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def cast(to: String): Column = cast(DataTypeParser.parse(to))
 
@@ -682,6 +735,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def desc: Column = SortOrder(expr, Descending)
 
@@ -696,6 +750,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def asc: Column = SortOrder(expr, Ascending)
 
@@ -703,6 +758,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Prints the expression to the console for debugging purpose.
    *
    * @group df_ops
+   * @since 1.3.0
    */
   def explain(extended: Boolean): Unit = {
     if (extended) {
@@ -719,6 +775,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr)
 
@@ -729,6 +786,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr)
 
@@ -739,6 +797,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr)
 
@@ -748,61 +807,115 @@ class Column(protected[sql] val expr: Expression) extends Logging {
 /**
  * :: Experimental ::
  * A convenient class used for constructing schema.
+ *
+ * @since 1.3.0
  */
 @Experimental
 class ColumnName(name: String) extends Column(name) {
 
-  /** Creates a new AttributeReference of type boolean */
+  /**
+   * Creates a new [[StructField]] of type boolean.
+   * @since 1.3.0
+   */
   def boolean: StructField = StructField(name, BooleanType)
 
-  /** Creates a new AttributeReference of type byte */
+  /**
+   * Creates a new [[StructField]] of type byte.
+   * @since 1.3.0
+   */
   def byte: StructField = StructField(name, ByteType)
 
-  /** Creates a new AttributeReference of type short */
+  /**
+   * Creates a new [[StructField]] of type short.
+   * @since 1.3.0
+   */
   def short: StructField = StructField(name, ShortType)
 
-  /** Creates a new AttributeReference of type int */
+  /**
+   * Creates a new [[StructField]] of type int.
+   * @since 1.3.0
+   */
   def int: StructField = StructField(name, IntegerType)
 
-  /** Creates a new AttributeReference of type long */
+  /**
+   * Creates a new [[StructField]] of type long.
+   * @since 1.3.0
+   */
   def long: StructField = StructField(name, LongType)
 
-  /** Creates a new AttributeReference of type float */
+  /**
+   * Creates a new [[StructField]] of type float.
+   * @since 1.3.0
+   */
   def float: StructField = StructField(name, FloatType)
 
-  /** Creates a new AttributeReference of type double */
+  /**
+   * Creates a new [[StructField]] of type double.
+   * @since 1.3.0
+   */
   def double: StructField = StructField(name, DoubleType)
 
-  /** Creates a new AttributeReference of type string */
+  /**
+   * Creates a new [[StructField]] of type string.
+   * @since 1.3.0
+   */
   def string: StructField = StructField(name, StringType)
 
-  /** Creates a new AttributeReference of type date */
+  /**
+   * Creates a new [[StructField]] of type date.
+   * @since 1.3.0
+   */
   def date: StructField = StructField(name, DateType)
 
-  /** Creates a new AttributeReference of type decimal */
+  /**
+   * Creates a new [[StructField]] of type decimal.
+   * @since 1.3.0
+   */
   def decimal: StructField = StructField(name, DecimalType.Unlimited)
 
-  /** Creates a new AttributeReference of type decimal */
+  /**
+   * Creates a new [[StructField]] of type decimal.
+   * @since 1.3.0
+   */
   def decimal(precision: Int, scale: Int): StructField =
     StructField(name, DecimalType(precision, scale))
 
-  /** Creates a new AttributeReference of type timestamp */
+  /**
+   * Creates a new [[StructField]] of type timestamp.
+   * @since 1.3.0
+   */
   def timestamp: StructField = StructField(name, TimestampType)
 
-  /** Creates a new AttributeReference of type binary */
+  /**
+   * Creates a new [[StructField]] of type binary.
+   * @since 1.3.0
+   */
   def binary: StructField = StructField(name, BinaryType)
 
-  /** Creates a new AttributeReference of type array */
+  /**
+   * Creates a new [[StructField]] of type array.
+   * @since 1.3.0
+   */
   def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
 
-  /** Creates a new AttributeReference of type map */
+  /**
+   * Creates a new [[StructField]] of type map.
+   * @since 1.3.0
+   */
   def map(keyType: DataType, valueType: DataType): StructField =
     map(MapType(keyType, valueType))
 
   def map(mapType: MapType): StructField = StructField(name, mapType)
 
-  /** Creates a new AttributeReference of type struct */
+  /**
+   * Creates a new [[StructField]] of type struct.
+   * @since 1.3.0
+   */
   def struct(fields: StructField*): StructField = struct(StructType(fields))
 
+  /**
+   * Creates a new [[StructField]] of type struct.
+   * @since 1.3.0
+   */
   def struct(structType: StructType): StructField = StructField(name, structType)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 01fd432cc8190..c820a673575ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -114,6 +114,7 @@ private[sql] object DataFrame {
  * @groupname rdd RDD Operations
  * @groupname output Output Operations
  * @groupname action Actions
+ * @since 1.3.0
  */
 // TODO: Improve documentation.
 @Experimental
@@ -233,6 +234,7 @@ class DataFrame private[sql](
   /**
    * Returns the object itself.
    * @group basic
+   * @since 1.3.0
    */
   // This is declared with parentheses to prevent the Scala compiler from treating
   // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
@@ -247,6 +249,7 @@ class DataFrame private[sql](
    *   rdd.toDF("id", "name")  // this creates a DataFrame with column name "id" and "name"
    * }}}
    * @group basic
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def toDF(colNames: String*): DataFrame = {
@@ -264,12 +267,14 @@ class DataFrame private[sql](
   /**
    * Returns the schema of this [[DataFrame]].
    * @group basic
+   * @since 1.3.0
    */
   def schema: StructType = queryExecution.analyzed.schema
 
   /**
    * Returns all column names and their data types as an array.
    * @group basic
+   * @since 1.3.0
    */
   def dtypes: Array[(String, String)] = schema.fields.map { field =>
     (field.name, field.dataType.toString)
@@ -278,18 +283,21 @@ class DataFrame private[sql](
   /**
    * Returns all column names as an array.
    * @group basic
+   * @since 1.3.0
    */
   def columns: Array[String] = schema.fields.map(_.name)
 
   /**
    * Prints the schema to the console in a nice tree format.
    * @group basic
+   * @since 1.3.0
    */
   def printSchema(): Unit = println(schema.treeString)
 
   /**
    * Prints the plans (logical and physical) to the console for debugging purposes.
    * @group basic
+   * @since 1.3.0
    */
   def explain(extended: Boolean): Unit = {
     ExplainCommand(
@@ -302,6 +310,7 @@ class DataFrame private[sql](
   /**
    * Only prints the physical plan to the console for debugging purposes.
    * @group basic
+   * @since 1.3.0
    */
   def explain(): Unit = explain(extended = false)
 
@@ -309,6 +318,7 @@ class DataFrame private[sql](
    * Returns true if the `collect` and `take` methods can be run locally
    * (without any Spark executors).
    * @group basic
+   * @since 1.3.0
    */
   def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation]
 
@@ -325,12 +335,14 @@ class DataFrame private[sql](
    * @param numRows Number of rows to show
    *
    * @group action
+   * @since 1.3.0
    */
   def show(numRows: Int): Unit = println(showString(numRows))
 
   /**
    * Displays the top 20 rows of [[DataFrame]] in a tabular form.
    * @group action
+   * @since 1.3.0
    */
   def show(): Unit = show(20)
 
@@ -342,6 +354,7 @@ class DataFrame private[sql](
    * }}}
    *
    * @group dfops
+   * @since 1.3.1
    */
   def na: DataFrameNaFunctions = new DataFrameNaFunctions(this)
 
@@ -353,6 +366,7 @@ class DataFrame private[sql](
    * }}}
    *
    * @group dfops
+   * @since 1.4.0
    */
   def stat: DataFrameStatFunctions = new DataFrameStatFunctions(this)
 
@@ -363,6 +377,7 @@ class DataFrame private[sql](
    *
    * @param right Right side of the join operation.
    * @group dfops
+   * @since 1.3.0
    */
   def join(right: DataFrame): DataFrame = {
     Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
@@ -386,6 +401,7 @@ class DataFrame private[sql](
    * @param right Right side of the join operation.
    * @param usingColumn Name of the column to join on. This column must exist on both sides.
    * @group dfops
+   * @since 1.4.0
    */
   def join(right: DataFrame, usingColumn: String): DataFrame = {
     // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right
@@ -416,6 +432,7 @@ class DataFrame private[sql](
    *   df1.join(df2).where($"df1Key" === $"df2Key")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def join(right: DataFrame, joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
 
@@ -437,6 +454,7 @@ class DataFrame private[sql](
    * @param joinExprs Join expression.
    * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
    * @group dfops
+   * @since 1.3.0
    */
   def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = {
     // Note that in this function, we introduce a hack in the case of self-join to automatically
@@ -483,6 +501,7 @@ class DataFrame private[sql](
    *   df.sort($"sortcol".asc)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def sort(sortCol: String, sortCols: String*): DataFrame = {
@@ -495,6 +514,7 @@ class DataFrame private[sql](
    *   df.sort($"col1", $"col2".desc)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def sort(sortExprs: Column*): DataFrame = {
@@ -513,6 +533,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] sorted by the given expressions.
    * This is an alias of the `sort` function.
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def orderBy(sortCol: String, sortCols: String*): DataFrame = sort(sortCol, sortCols :_*)
@@ -521,6 +542,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] sorted by the given expressions.
    * This is an alias of the `sort` function.
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def orderBy(sortExprs: Column*): DataFrame = sort(sortExprs :_*)
@@ -528,12 +550,14 @@ class DataFrame private[sql](
   /**
    * Selects column based on the column name and return it as a [[Column]].
    * @group dfops
+   * @since 1.3.0
    */
   def apply(colName: String): Column = col(colName)
 
   /**
    * Selects column based on the column name and return it as a [[Column]].
    * @group dfops
+   * @since 1.3.0
    */
   def col(colName: String): Column = colName match {
     case "*" =>
@@ -546,12 +570,14 @@ class DataFrame private[sql](
   /**
    * Returns a new [[DataFrame]] with an alias set.
    * @group dfops
+   * @since 1.3.0
    */
   def as(alias: String): DataFrame = Subquery(alias, logicalPlan)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] with an alias set.
    * @group dfops
+   * @since 1.3.0
    */
   def as(alias: Symbol): DataFrame = as(alias.name)
 
@@ -561,6 +587,7 @@ class DataFrame private[sql](
    *   df.select($"colA", $"colB" + 1)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def select(cols: Column*): DataFrame = {
@@ -583,6 +610,7 @@ class DataFrame private[sql](
    *   df.select($"colA", $"colB")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) :_*)
@@ -595,6 +623,7 @@ class DataFrame private[sql](
    *   df.selectExpr("colA", "colB as newName", "abs(colC)")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def selectExpr(exprs: String*): DataFrame = {
@@ -612,6 +641,7 @@ class DataFrame private[sql](
    *   peopleDf($"age" > 15)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def filter(condition: Column): DataFrame = Filter(condition.expr, logicalPlan)
 
@@ -621,6 +651,7 @@ class DataFrame private[sql](
    *   peopleDf.filter("age > 15")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def filter(conditionExpr: String): DataFrame = {
     filter(Column(new SqlParser().parseExpression(conditionExpr)))
@@ -635,6 +666,7 @@ class DataFrame private[sql](
    *   peopleDf($"age" > 15)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def where(condition: Column): DataFrame = filter(condition)
 
@@ -653,6 +685,7 @@ class DataFrame private[sql](
    *   ))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr))
@@ -675,6 +708,7 @@ class DataFrame private[sql](
    *   ))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def groupBy(col1: String, cols: String*): GroupedData = {
@@ -690,6 +724,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg("age" -> "max", "salary" -> "avg")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
     groupBy().agg(aggExpr, aggExprs :_*)
@@ -703,6 +738,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
 
@@ -714,6 +750,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
 
@@ -725,6 +762,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg(max($"age"), avg($"salary"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs :_*)
@@ -733,6 +771,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] by taking the first `n` rows. The difference between this function
    * and `head` is that `head` returns an array while `limit` returns a new [[DataFrame]].
    * @group dfops
+   * @since 1.3.0
    */
   def limit(n: Int): DataFrame = Limit(Literal(n), logicalPlan)
 
@@ -740,6 +779,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] containing union of rows in this frame and another frame.
    * This is equivalent to `UNION ALL` in SQL.
    * @group dfops
+   * @since 1.3.0
    */
   def unionAll(other: DataFrame): DataFrame = Union(logicalPlan, other.logicalPlan)
 
@@ -747,6 +787,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] containing rows only in both this frame and another frame.
    * This is equivalent to `INTERSECT` in SQL.
    * @group dfops
+   * @since 1.3.0
    */
   def intersect(other: DataFrame): DataFrame = Intersect(logicalPlan, other.logicalPlan)
 
@@ -754,6 +795,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] containing rows in this frame but not in another frame.
    * This is equivalent to `EXCEPT` in SQL.
    * @group dfops
+   * @since 1.3.0
    */
   def except(other: DataFrame): DataFrame = Except(logicalPlan, other.logicalPlan)
 
@@ -764,6 +806,7 @@ class DataFrame private[sql](
    * @param fraction Fraction of rows to generate.
    * @param seed Seed for sampling.
    * @group dfops
+   * @since 1.3.0
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = {
     Sample(0.0, fraction, withReplacement, seed, logicalPlan)
@@ -775,6 +818,7 @@ class DataFrame private[sql](
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.
    * @group dfops
+   * @since 1.3.0
    */
   def sample(withReplacement: Boolean, fraction: Double): DataFrame = {
     sample(withReplacement, fraction, Utils.random.nextLong)
@@ -786,6 +830,7 @@ class DataFrame private[sql](
    * @param weights weights for splits, will be normalized if they don't sum to 1.
    * @param seed Seed for sampling.
    * @group dfops
+   * @since 1.4.0
    */
   def randomSplit(weights: Array[Double], seed: Long): Array[DataFrame] = {
     val sum = weights.sum
@@ -800,6 +845,7 @@ class DataFrame private[sql](
    *
    * @param weights weights for splits, will be normalized if they don't sum to 1.
    * @group dfops
+   * @since 1.4.0
    */
   def randomSplit(weights: Array[Double]): Array[DataFrame] = {
     randomSplit(weights, Utils.random.nextLong)
@@ -836,6 +882,7 @@ class DataFrame private[sql](
    *   val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = {
     val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
@@ -860,6 +907,7 @@ class DataFrame private[sql](
    *   df.explode("words", "word")(words: String => words.split(" "))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B])
     : DataFrame = {
@@ -883,6 +931,7 @@ class DataFrame private[sql](
   /**
    * Returns a new [[DataFrame]] by adding a column.
    * @group dfops
+   * @since 1.3.0
    */
   def withColumn(colName: String, col: Column): DataFrame = {
     val resolver = sqlContext.analyzer.resolver
@@ -902,6 +951,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] with a column renamed.
    * This is a no-op if schema doesn't contain existingName.
    * @group dfops
+   * @since 1.3.0
    */
   def withColumnRenamed(existingName: String, newName: String): DataFrame = {
     val resolver = sqlContext.analyzer.resolver
@@ -921,6 +971,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] with a column dropped.
    * This is a no-op if schema doesn't contain column name.
    * @group dfops
+   * @since 1.4.0
    */
   def drop(colName: String): DataFrame = {
     val resolver = sqlContext.analyzer.resolver
@@ -940,6 +991,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
    * This is an alias for `distinct`.
    * @group dfops
+   * @since 1.4.0
    */
   def dropDuplicates(): DataFrame = dropDuplicates(this.columns)
 
@@ -948,6 +1000,7 @@ class DataFrame private[sql](
    * the subset of columns.
    *
    * @group dfops
+   * @since 1.4.0
    */
   def dropDuplicates(colNames: Seq[String]): DataFrame = {
     val groupCols = colNames.map(resolve)
@@ -967,6 +1020,7 @@ class DataFrame private[sql](
    * the subset of columns.
    *
    * @group dfops
+   * @since 1.4.0
    */
   def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq)
 
@@ -991,6 +1045,7 @@ class DataFrame private[sql](
    * }}}
    *
    * @group action
+   * @since 1.3.1
    */
   @scala.annotation.varargs
   def describe(cols: String*): DataFrame = {
@@ -1034,24 +1089,28 @@ class DataFrame private[sql](
   /**
    * Returns the first `n` rows.
    * @group action
+   * @since 1.3.0
    */
   def head(n: Int): Array[Row] = limit(n).collect()
 
   /**
    * Returns the first row.
    * @group action
+   * @since 1.3.0
    */
   def head(): Row = head(1).head
 
   /**
    * Returns the first row. Alias for head().
    * @group action
+   * @since 1.3.0
    */
   override def first(): Row = head()
 
   /**
    * Returns a new RDD by applying a function to all rows of this DataFrame.
    * @group rdd
+   * @since 1.3.0
    */
   override def map[R: ClassTag](f: Row => R): RDD[R] = rdd.map(f)
 
@@ -1059,12 +1118,14 @@ class DataFrame private[sql](
    * Returns a new RDD by first applying a function to all rows of this [[DataFrame]],
    * and then flattening the results.
    * @group rdd
+   * @since 1.3.0
    */
   override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = rdd.flatMap(f)
 
   /**
    * Returns a new RDD by applying a function to each partition of this DataFrame.
    * @group rdd
+   * @since 1.3.0
    */
   override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = {
     rdd.mapPartitions(f)
@@ -1073,42 +1134,49 @@ class DataFrame private[sql](
   /**
    * Applies a function `f` to all rows.
    * @group rdd
+   * @since 1.3.0
    */
   override def foreach(f: Row => Unit): Unit = rdd.foreach(f)
 
   /**
    * Applies a function f to each partition of this [[DataFrame]].
    * @group rdd
+   * @since 1.3.0
    */
   override def foreachPartition(f: Iterator[Row] => Unit): Unit = rdd.foreachPartition(f)
 
   /**
    * Returns the first `n` rows in the [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def take(n: Int): Array[Row] = head(n)
 
   /**
    * Returns an array that contains all of [[Row]]s in this [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect()
 
   /**
    * Returns a Java list that contains all of [[Row]]s in this [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() :_*)
 
   /**
    * Returns the number of rows in the [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def count(): Long = groupBy().count().collect().head.getLong(0)
 
   /**
    * Returns a new [[DataFrame]] that has exactly `numPartitions` partitions.
    * @group rdd
+   * @since 1.3.0
    */
   override def repartition(numPartitions: Int): DataFrame = {
     Repartition(numPartitions, shuffle = true, logicalPlan)
@@ -1120,6 +1188,7 @@ class DataFrame private[sql](
    * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
    * the 100 new partitions will claim 10 of the current partitions.
    * @group rdd
+   * @since 1.4.0
    */
   override def coalesce(numPartitions: Int): DataFrame = {
     Repartition(numPartitions, shuffle = false, logicalPlan)
@@ -1129,11 +1198,13 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
    * This is an alias for `dropDuplicates`.
    * @group dfops
+   * @since 1.3.0
    */
   override def distinct: DataFrame = Distinct(logicalPlan)
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def persist(): this.type = {
     sqlContext.cacheManager.cacheQuery(this)
@@ -1142,11 +1213,13 @@ class DataFrame private[sql](
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def cache(): this.type = persist()
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def persist(newLevel: StorageLevel): this.type = {
     sqlContext.cacheManager.cacheQuery(this, None, newLevel)
@@ -1155,6 +1228,7 @@ class DataFrame private[sql](
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def unpersist(blocking: Boolean): this.type = {
     sqlContext.cacheManager.tryUncacheQuery(this, blocking)
@@ -1163,6 +1237,7 @@ class DataFrame private[sql](
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def unpersist(): this.type = unpersist(blocking = false)
 
@@ -1175,6 +1250,7 @@ class DataFrame private[sql](
    * memoized. Once called, it won't change even if you change any query planning related Spark SQL
    * configurations (e.g. `spark.sql.shuffle.partitions`).
    * @group rdd
+   * @since 1.3.0
    */
   lazy val rdd: RDD[Row] = {
     // use a local variable to make sure the map closure doesn't capture the whole DataFrame
@@ -1188,12 +1264,14 @@ class DataFrame private[sql](
   /**
    * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
    * @group rdd
+   * @since 1.3.0
    */
   def toJavaRDD: JavaRDD[Row] = rdd.toJavaRDD()
 
   /**
    * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
    * @group rdd
+   * @since 1.3.0
    */
   def javaRDD: JavaRDD[Row] = toJavaRDD
 
@@ -1202,6 +1280,7 @@ class DataFrame private[sql](
    * temporary table is tied to the [[SQLContext]] that was used to create this DataFrame.
    *
    * @group basic
+   * @since 1.3.0
    */
   def registerTempTable(tableName: String): Unit = {
     sqlContext.registerDataFrameAsTable(this, tableName)
@@ -1212,6 +1291,7 @@ class DataFrame private[sql](
    * Files that are written out using this method can be read back in as a [[DataFrame]]
    * using the `parquetFile` function in [[SQLContext]].
    * @group output
+   * @since 1.3.0
    */
   def saveAsParquetFile(path: String): Unit = {
     if (sqlContext.conf.parquetUseDataSourceApi) {
@@ -1235,6 +1315,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String): Unit = {
@@ -1254,6 +1335,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String, mode: SaveMode): Unit = {
@@ -1281,6 +1363,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String, source: String): Unit = {
@@ -1300,6 +1383,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = {
@@ -1319,6 +1403,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(
@@ -1340,6 +1425,7 @@ class DataFrame private[sql](
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def saveAsTable(
@@ -1365,6 +1451,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(
@@ -1396,6 +1483,7 @@ class DataFrame private[sql](
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def saveAsTable(
@@ -1421,6 +1509,7 @@ class DataFrame private[sql](
    * using the default data source configured by spark.sql.sources.default and
    * [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String): Unit = {
@@ -1432,6 +1521,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode,
    * using the default data source configured by spark.sql.sources.default.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String, mode: SaveMode): Unit = {
@@ -1444,6 +1534,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * using [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String, source: String): Unit = {
@@ -1455,6 +1546,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source and
    * [[SaveMode]] specified by mode.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String, source: String, mode: SaveMode): Unit = {
@@ -1466,6 +1558,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(
@@ -1480,6 +1573,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def save(
@@ -1496,6 +1590,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(
@@ -1510,6 +1605,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def save(
@@ -1524,6 +1620,7 @@ class DataFrame private[sql](
    * :: Experimental ::
    * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def insertInto(tableName: String, overwrite: Boolean): Unit = {
@@ -1536,6 +1633,7 @@ class DataFrame private[sql](
    * Adds the rows from this RDD to the specified table.
    * Throws an exception if the table already exists.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false)
@@ -1543,6 +1641,7 @@ class DataFrame private[sql](
   /**
    * Returns the content of the [[DataFrame]] as a RDD of JSON strings.
    * @group rdd
+   * @since 1.3.0
    */
   def toJSON: RDD[String] = {
     val rowSchema = this.schema
@@ -1581,6 +1680,7 @@ class DataFrame private[sql](
    * given name; if you pass `false`, it will throw if the table already
    * exists.
    * @group output
+   * @since 1.3.0
    */
   def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
     createJDBCTable(url, table, allowExisting, new Properties())
@@ -1594,6 +1694,7 @@ class DataFrame private[sql](
    * given name; if you pass `false`, it will throw if the table already
    * exists.
    * @group output
+   * @since 1.4.0
    */
   def createJDBCTable(
       url: String,
@@ -1626,6 +1727,7 @@ class DataFrame private[sql](
    * the RDD in order via the simple statement
    * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
    * @group output
+   * @since 1.3.0
    */
   def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
     insertIntoJDBC(url, table, overwrite, new Properties())
@@ -1643,6 +1745,7 @@ class DataFrame private[sql](
    * the RDD in order via the simple statement
    * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
    * @group output
+   * @since 1.4.0
    */
   def insertIntoJDBC(
       url: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
index a3187fe3230fd..b87efb58d51e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql
 
 /**
  * A container for a [[DataFrame]], used for implicit conversions.
+ *
+ * @since 1.3.0
  */
 private[sql] case class DataFrameHolder(df: DataFrame) {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 4a54120ba86f6..b4c2daa055868 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -30,12 +30,16 @@ import org.apache.spark.sql.types._
 /**
  * :: Experimental ::
  * Functionality for working with missing data in [[DataFrame]]s.
+ *
+ * @since 1.3.1
  */
 @Experimental
 final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing any null values.
+   *
+   * @since 1.3.1
    */
   def drop(): DataFrame = drop("any", df.columns)
 
@@ -44,18 +48,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * If `how` is "any", then drop rows containing any null values.
    * If `how` is "all", then drop rows only if every column is null for that row.
+   *
+   * @since 1.3.1
    */
   def drop(how: String): DataFrame = drop(how, df.columns)
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing any null values
    * in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(cols: Array[String]): DataFrame = drop(cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame ]] that drops rows containing any null values
    * in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols)
 
@@ -65,6 +75,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * If `how` is "any", then drop rows containing any null values in the specified columns.
    * If `how` is "all", then drop rows only if every specified column is null for that row.
+   *
+   * @since 1.3.1
    */
   def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq)
 
@@ -74,6 +86,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * If `how` is "any", then drop rows containing any null values in the specified columns.
    * If `how` is "all", then drop rows only if every specified column is null for that row.
+   *
+   * @since 1.3.1
    */
   def drop(how: String, cols: Seq[String]): DataFrame = {
     how.toLowerCase match {
@@ -85,18 +99,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null values.
+   *
+   * @since 1.3.1
    */
   def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns)
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null
    * values in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than
    * `minNonNulls` non-null values in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = {
     // Filtering condition -- only keep the row if it has at least `minNonNulls` non-null values.
@@ -106,23 +126,31 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
    * Returns a new [[DataFrame]] that replaces null values in numeric columns with `value`.
+   *
+   * @since 1.3.1
    */
   def fill(value: Double): DataFrame = fill(value, df.columns)
 
   /**
    * Returns a new [[DataFrame ]] that replaces null values in string columns with `value`.
+   *
+   * @since 1.3.1
    */
   def fill(value: String): DataFrame = fill(value, df.columns)
 
   /**
    * Returns a new [[DataFrame]] that replaces null values in specified numeric columns.
    * If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in specified
    * numeric columns. If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: Double, cols: Seq[String]): DataFrame = {
     val columnEquals = df.sqlContext.analyzer.resolver
@@ -140,12 +168,16 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   /**
    * Returns a new [[DataFrame]] that replaces null values in specified string columns.
    * If a specified column is not a string column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in
    * specified string columns. If a specified column is not a string column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: String, cols: Seq[String]): DataFrame = {
     val columnEquals = df.sqlContext.analyzer.resolver
@@ -172,6 +204,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *   import com.google.common.collect.ImmutableMap;
    *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
    * }}}
+   *
+   * @since 1.3.1
    */
   def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.toSeq)
 
@@ -189,6 +223,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *     "B" -> 1.0
    *   ))
    * }}}
+   *
+   * @since 1.3.1
    */
   def fill(valueMap: Map[String, Any]): DataFrame = fill0(valueMap.toSeq)
 
@@ -212,6 +248,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param col name of the column to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame = {
     replace[T](col, replacement.toMap : Map[T, T])
@@ -233,6 +271,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param cols list of columns to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = {
     replace(cols.toSeq, replacement.toMap)
@@ -256,6 +296,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param col name of the column to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](col: String, replacement: Map[T, T]): DataFrame = {
     if (col == "*") {
@@ -279,6 +321,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param cols list of columns to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = replace0(cols, replacement)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index a1e74470afc89..5d106c1ac2674 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -23,6 +23,8 @@ import org.apache.spark.sql.execution.stat._
 /**
  * :: Experimental ::
  * Statistic functions for [[DataFrame]]s.
+ *
+ * @since 1.4.0
  */
 @Experimental
 final class DataFrameStatFunctions private[sql](df: DataFrame) {
@@ -32,6 +34,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col1 the name of the first column
    * @param col2 the name of the second column
    * @return the covariance of the two columns.
+   *
+   * @since 1.4.0
    */
   def cov(col1: String, col2: String): Double = {
     StatFunctions.calculateCov(df, Seq(col1, col2))
@@ -45,6 +49,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col1 the name of the column
    * @param col2 the name of the column to calculate the correlation against
    * @return The Pearson Correlation Coefficient as a Double.
+   *
+   * @since 1.4.0
    */
   def corr(col1: String, col2: String, method: String): Double = {
     require(method == "pearson", "Currently only the calculation of the Pearson Correlation " +
@@ -58,6 +64,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col1 the name of the column
    * @param col2 the name of the column to calculate the correlation against
    * @return The Pearson Correlation Coefficient as a Double.
+   *
+   * @since 1.4.0
    */
   def corr(col1: String, col2: String): Double = {
     corr(col1, col2, "pearson")
@@ -76,6 +84,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col2 The name of the second column. Distinct items will make the column names
    *             of the DataFrame.
    * @return A DataFrame containing for the contingency table.
+   *
+   * @since 1.4.0
    */
   def crosstab(col1: String, col2: String): DataFrame = {
     StatFunctions.crossTabulate(df, col1, col2)
@@ -91,6 +101,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param support The minimum frequency for an item to be considered `frequent`. Should be greater
    *                than 1e-4.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Array[String], support: Double): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, support)
@@ -104,6 +116,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Array[String]): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, 0.01)
@@ -116,6 +130,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Seq[String], support: Double): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, support)
@@ -129,6 +145,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Seq[String]): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, 0.01)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index d5d7e35a6b35d..717709e4f9312 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -27,6 +27,8 @@ import org.apache.spark.annotation.Experimental
  * {{{
  *   sqlContext.experimental.extraStrategies += ...
  * }}}
+ *
+ * @since 1.3.0
  */
 @Experimental
 class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
@@ -34,6 +36,8 @@ class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
   /**
    * Allows extra strategies to be injected into the query planner at runtime.  Note this API
    * should be consider experimental and is not intended to be stable across releases.
+   *
+   * @since 1.3.0
    */
   @Experimental
   var extraStrategies: Seq[Strategy] = Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 543320e471bf7..1381b9f1a6080 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.types.NumericType
 /**
  * :: Experimental ::
  * A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]].
+ *
+ * @since 1.3.0
  */
 @Experimental
 class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) {
@@ -94,6 +96,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *     "expense" -> "sum"
    *   )
    * }}}
+   *
+   * @since 1.3.0
    */
   def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
     agg((aggExpr +: aggExprs).toMap)
@@ -111,6 +115,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *     "expense" -> "sum"
    *   ))
    * }}}
+   *
+   * @since 1.3.0
    */
   def agg(exprs: Map[String, String]): DataFrame = {
     exprs.map { case (colName, expr) =>
@@ -129,6 +135,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *   import com.google.common.collect.ImmutableMap;
    *   df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum"));
    * }}}
+   *
+   * @since 1.3.0
    */
   def agg(exprs: java.util.Map[String, String]): DataFrame = {
     agg(exprs.toMap)
@@ -162,6 +170,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *   // Java, 1.3.x:
    *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
    * }}}
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = {
@@ -183,6 +193,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   /**
    * Count the number of rows for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
+   *
+   * @since 1.3.0
    */
   def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")())
 
@@ -190,6 +202,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the average values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def mean(colNames: String*): DataFrame = {
@@ -200,6 +214,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the max value for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the max values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def max(colNames: String*): DataFrame = {
@@ -210,6 +226,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the mean value for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the mean values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def avg(colNames: String*): DataFrame = {
@@ -220,6 +238,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the min value for each numeric column for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the min values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def min(colNames: String*): DataFrame = {
@@ -230,6 +250,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the sum for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the sum for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def sum(colNames: String*): DataFrame = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
index db484c5f50074..1ec874f79617c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
@@ -21,11 +21,12 @@ import java.beans.Introspector
 import java.lang.{Iterable => JIterable}
 import java.util.{Iterator => JIterator, Map => JMap}
 
+import scala.language.existentials
+
 import com.google.common.reflect.TypeToken
 
 import org.apache.spark.sql.types._
 
-import scala.language.existentials
 
 /**
  * Type-inference utilities for POJOs and Java collections.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 70ba8985d6342..975498c11fa23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -95,6 +95,8 @@ private[spark] class DefaultParserDialect extends ParserDialect {
  * @groupname config Configuration
  * @groupname dataframes Custom DataFrame Creation
  * @groupname Ungrouped Support functions for language integrated queries.
+ *
+ * @since 1.0.0
  */
 class SQLContext(@transient val sparkContext: SparkContext)
   extends org.apache.spark.Logging
@@ -113,6 +115,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Set Spark SQL configuration properties.
    *
    * @group config
+   * @since 1.0.0
    */
   def setConf(props: Properties): Unit = conf.setConf(props)
 
@@ -120,6 +123,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Set the given Spark SQL configuration property.
    *
    * @group config
+   * @since 1.0.0
    */
   def setConf(key: String, value: String): Unit = conf.setConf(key, value)
 
@@ -127,6 +131,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Return the value of Spark SQL configuration property for the given key.
    *
    * @group config
+   * @since 1.0.0
    */
   def getConf(key: String): String = conf.getConf(key)
 
@@ -135,6 +140,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * yet, return `defaultValue`.
    *
    * @group config
+   * @since 1.0.0
    */
   def getConf(key: String, defaultValue: String): String = conf.getConf(key, defaultValue)
 
@@ -143,6 +149,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * This creates a new copy of the config properties in the form of a Map.
    *
    * @group config
+   * @since 1.0.0
    */
   def getAllConfs: immutable.Map[String, String] = conf.getAllConfs
 
@@ -228,6 +235,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * the query planner for advanced functionality.
    *
    * @group basic
+   * @since 1.3.0
    */
   @Experimental
   @transient
@@ -238,6 +246,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns a [[DataFrame]] with no rows or columns.
    *
    * @group basic
+   * @since 1.3.0
    */
   @Experimental
   @transient
@@ -270,6 +279,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group basic
+   * @since 1.3.0
    * TODO move to SQLSession?
    */
   @transient
@@ -278,23 +288,27 @@ class SQLContext(@transient val sparkContext: SparkContext)
   /**
    * Returns true if the table is currently cached in-memory.
    * @group cachemgmt
+   * @since 1.3.0
    */
   def isCached(tableName: String): Boolean = cacheManager.isCached(tableName)
 
   /**
    * Caches the specified table in-memory.
    * @group cachemgmt
+   * @since 1.3.0
    */
   def cacheTable(tableName: String): Unit = cacheManager.cacheTable(tableName)
 
   /**
    * Removes the specified table from the in-memory cache.
    * @group cachemgmt
+   * @since 1.3.0
    */
   def uncacheTable(tableName: String): Unit = cacheManager.uncacheTable(tableName)
 
   /**
    * Removes all cached tables from the in-memory cache.
+   * @since 1.3.0
    */
   def clearCache(): Unit = cacheManager.clearCache()
 
@@ -311,27 +325,40 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group basic
+   * @since 1.3.0
    */
   @Experimental
   object implicits extends Serializable {
     // scalastyle:on
 
-    /** Converts $"col name" into an [[Column]]. */
+    /**
+     * Converts $"col name" into an [[Column]].
+     * @since 1.3.0
+     */
     implicit class StringToColumn(val sc: StringContext) {
       def $(args: Any*): ColumnName = {
         new ColumnName(sc.s(args :_*))
       }
     }
 
-    /** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */
+    /**
+     * An implicit conversion that turns a Scala `Symbol` into a [[Column]].
+     * @since 1.3.0
+     */
     implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
 
-    /** Creates a DataFrame from an RDD of case classes or tuples. */
+    /**
+     * Creates a DataFrame from an RDD of case classes or tuples.
+     * @since 1.3.0
+     */
     implicit def rddToDataFrameHolder[A <: Product : TypeTag](rdd: RDD[A]): DataFrameHolder = {
       DataFrameHolder(self.createDataFrame(rdd))
     }
 
-    /** Creates a DataFrame from a local Seq of Product. */
+    /**
+     * Creates a DataFrame from a local Seq of Product.
+     * @since 1.3.0
+     */
     implicit def localSeqToDataFrameHolder[A <: Product : TypeTag](data: Seq[A]): DataFrameHolder =
     {
       DataFrameHolder(self.createDataFrame(data))
@@ -341,7 +368,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
     // making existing implicit conversions ambiguous. In particular, RDD[Double] is dangerous
     // because of [[DoubleRDDFunctions]].
 
-    /** Creates a single column DataFrame from an RDD[Int]. */
+    /**
+     * Creates a single column DataFrame from an RDD[Int].
+     * @since 1.3.0
+     */
     implicit def intRddToDataFrameHolder(data: RDD[Int]): DataFrameHolder = {
       val dataType = IntegerType
       val rows = data.mapPartitions { iter =>
@@ -354,7 +384,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
       DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil)))
     }
 
-    /** Creates a single column DataFrame from an RDD[Long]. */
+    /**
+     * Creates a single column DataFrame from an RDD[Long].
+     * @since 1.3.0
+     */
     implicit def longRddToDataFrameHolder(data: RDD[Long]): DataFrameHolder = {
       val dataType = LongType
       val rows = data.mapPartitions { iter =>
@@ -367,7 +400,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
       DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil)))
     }
 
-    /** Creates a single column DataFrame from an RDD[String]. */
+    /**
+     * Creates a single column DataFrame from an RDD[String].
+     * @since 1.3.0
+     */
     implicit def stringRddToDataFrameHolder(data: RDD[String]): DataFrameHolder = {
       val dataType = StringType
       val rows = data.mapPartitions { iter =>
@@ -386,6 +422,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Creates a DataFrame from an RDD of case classes.
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @Experimental
   def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
@@ -401,6 +438,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Creates a DataFrame from a local Seq of Product.
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @Experimental
   def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
@@ -414,6 +452,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
    *
    * @group dataframes
+   * @since 1.3.0
    */
   def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
     DataFrame(this, LogicalRelation(baseRelation))
@@ -449,6 +488,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @DeveloperApi
   def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
@@ -480,6 +520,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * the provided schema. Otherwise, there will be runtime exception.
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @DeveloperApi
   def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
@@ -492,6 +533,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    *          SELECT * queries will return the columns in an undefined order.
    * @group dataframes
+   * @since 1.3.0
    */
   def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
     val attributeSeq = getSchema(beanClass)
@@ -520,6 +562,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    *          SELECT * queries will return the columns in an undefined order.
    * @group dataframes
+   * @since 1.3.0
    */
   def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
     createDataFrame(rdd.rdd, beanClass)
@@ -591,6 +634,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * [[DataFrame]] if no paths are passed in.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def parquetFile(paths: String*): DataFrame = {
@@ -609,6 +653,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   def jsonFile(path: String): DataFrame = jsonFile(path, 1.0)
 
@@ -618,6 +663,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonFile(path: String, schema: StructType): DataFrame =
@@ -626,6 +672,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   /**
    * :: Experimental ::
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonFile(path: String, samplingRatio: Double): DataFrame =
@@ -637,6 +684,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0)
 
@@ -647,6 +695,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0)
 
@@ -656,6 +705,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
@@ -678,6 +728,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
@@ -690,6 +741,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
@@ -711,6 +763,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
@@ -723,6 +776,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * using the default data source configured by spark.sql.sources.default.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(path: String): DataFrame = {
@@ -735,6 +789,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the dataset stored at path as a DataFrame, using the given data source.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(path: String, source: String): DataFrame = {
@@ -747,6 +802,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(source: String, options: java.util.Map[String, String]): DataFrame = {
@@ -759,6 +815,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(source: String, options: Map[String, String]): DataFrame = {
@@ -772,6 +829,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -787,6 +845,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -802,6 +861,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * (Scala-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -817,6 +877,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * (Scala-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -834,6 +895,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It will use the default data source configured by spark.sql.sources.default.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(tableName: String, path: String): DataFrame = {
@@ -847,6 +909,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * and returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -862,6 +925,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -878,6 +942,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -903,6 +968,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options. Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -920,6 +986,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options. Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -946,6 +1013,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * url named table.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jdbc(url: String, table: String): DataFrame = {
@@ -958,6 +1026,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * url named table and connection properties.
    *
    * @group specificdata
+   * @since 1.4.0
    */
   @Experimental
   def jdbc(url: String, table: String, properties: Properties): DataFrame = {
@@ -976,6 +1045,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
    *                      evenly into this many partitions
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jdbc(
@@ -1001,6 +1071,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *                      evenly into this many partitions
    * @param properties connection properties
    * @group specificdata
+   * @since 1.4.0
    */
   @Experimental
   def jdbc(
@@ -1024,6 +1095,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * of the [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
@@ -1038,6 +1110,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * of the [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.4.0
    */
   @Experimental
   def jdbc(
@@ -1075,6 +1148,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @param tableName the name of the table to be unregistered.
    *
    * @group basic
+   * @since 1.3.0
    */
   def dropTempTable(tableName: String): Unit = {
     cacheManager.tryUncacheQuery(table(tableName))
@@ -1086,6 +1160,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * used for SQL parsing can be configured with 'spark.sql.dialect'.
    *
    * @group basic
+   * @since 1.3.0
    */
   def sql(sqlText: String): DataFrame = {
     DataFrame(this, parseSql(sqlText))
@@ -1095,6 +1170,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the specified table as a [[DataFrame]].
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def table(tableName: String): DataFrame =
     DataFrame(this, catalog.lookupRelation(Seq(tableName)))
@@ -1105,6 +1181,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * indicating if a table is a temporary one or not).
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tables(): DataFrame = {
     DataFrame(this, ShowTablesCommand(None))
@@ -1116,6 +1193,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * indicating if a table is a temporary one or not).
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tables(databaseName: String): DataFrame = {
     DataFrame(this, ShowTablesCommand(Some(databaseName)))
@@ -1125,6 +1203,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the names of tables in the current database as an array.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tableNames(): Array[String] = {
     catalog.getTables(None).map {
@@ -1136,6 +1215,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the names of tables in the given database as an array.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tableNames(databaseName: String): Array[String] = {
     catalog.getTables(Some(databaseName)).map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
index 5921eaf5e63f4..6b1ae81972e4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql
 
-
 import scala.util.parsing.combinator.RegexParsers
 
 import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index b97aaf73529a3..dc3389c41bbfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -33,6 +33,8 @@ import org.apache.spark.sql.types.DataType
 
 /**
  * Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this.
+ *
+ * @since 1.3.0
  */
 class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
@@ -87,6 +89,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
         /**
          * Register a Scala closure of ${x} arguments as user-defined function (UDF).
          * @tparam RT return type of UDF.
+         * @since 1.3.0
          */
         def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
           val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -104,6 +107,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
       println(s"""
          |/**
          | * Register a user-defined function with ${i} arguments.
+         | * @since 1.3.0
          | */
          |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType) = {
          |  functionRegistry.registerFunction(
@@ -116,6 +120,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 0 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -127,6 +132,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 1 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -138,6 +144,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 2 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -149,6 +156,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 3 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -160,6 +168,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 4 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -171,6 +180,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 5 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -182,6 +192,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 6 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -193,6 +204,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 7 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -204,6 +216,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 8 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -215,6 +228,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 9 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -226,6 +240,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 10 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -237,6 +252,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 11 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -248,6 +264,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 12 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -259,6 +276,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 13 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -270,6 +288,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 14 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -281,6 +300,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 15 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -292,6 +312,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 16 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -303,6 +324,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 17 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -314,6 +336,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 18 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -325,6 +348,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 19 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -336,6 +360,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 20 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -347,6 +372,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 21 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -358,6 +384,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 22 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -371,6 +398,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 1 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF1[_, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -380,6 +408,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 2 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF2[_, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -389,6 +418,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 3 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -398,6 +428,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 4 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -407,6 +438,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 5 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -416,6 +448,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 6 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -425,6 +458,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 7 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -434,6 +468,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 8 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -443,6 +478,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 9 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -452,6 +488,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 10 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -461,6 +498,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 11 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -470,6 +508,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 12 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -479,6 +518,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 13 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -488,6 +528,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 14 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -497,6 +538,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 15 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -506,6 +548,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 16 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -515,6 +558,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 17 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -524,6 +568,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 18 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -533,6 +578,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 19 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -542,6 +588,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 20 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -551,6 +598,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 21 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -560,6 +608,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 22 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
index 295db539adfc4..505ab1301ec96 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import java.util.{List => JList, Map => JMap}
 
 import org.apache.spark.Accumulator
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.expressions.ScalaUdf
@@ -36,7 +37,10 @@ import org.apache.spark.sql.types.DataType
  *   // Projects a column that adds a prediction column based on the score column.
  *   df.select( predict(df("score")) )
  * }}}
+ *
+ * @since 1.3.0
  */
+@Experimental
 case class UserDefinedFunction protected[sql] (f: AnyRef, dataType: DataType) {
 
   def apply(exprs: Column*): Column = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index fae4bd0fd2994..215787e40bf0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -38,6 +38,7 @@ import org.apache.spark.util.Utils
  * @groupname normal_funcs Non-aggregate functions
  * @groupname math_funcs Math functions
  * @groupname Ungrouped Support functions for DataFrames.
+ * @since 1.3.0
  */
 @Experimental
 // scalastyle:off
@@ -50,6 +51,7 @@ object functions {
    * Returns a [[Column]] based on the given column name.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def col(colName: String): Column = Column(colName)
 
@@ -57,6 +59,7 @@ object functions {
    * Returns a [[Column]] based on the given column name. Alias of [[col]].
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def column(colName: String): Column = Column(colName)
 
@@ -68,6 +71,7 @@ object functions {
    * Otherwise, a new [[Column]] is created to represent the literal value.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def lit(literal: Any): Column = {
     literal match {
@@ -92,6 +96,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
+   * @since 1.3.0
    */
   def asc(columnName: String): Column = Column(columnName).asc
 
@@ -103,6 +108,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
+   * @since 1.3.0
    */
   def desc(columnName: String): Column = Column(columnName).desc
 
@@ -114,6 +120,7 @@ object functions {
    * Aggregate function: returns the sum of all values in the expression.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sum(e: Column): Column = Sum(e.expr)
 
@@ -121,6 +128,7 @@ object functions {
    * Aggregate function: returns the sum of all values in the given column.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sum(columnName: String): Column = sum(Column(columnName))
 
@@ -128,6 +136,7 @@ object functions {
    * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sumDistinct(e: Column): Column = SumDistinct(e.expr)
 
@@ -135,6 +144,7 @@ object functions {
    * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
 
@@ -142,6 +152,7 @@ object functions {
    * Aggregate function: returns the number of items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def count(e: Column): Column = e.expr match {
     // Turn count(*) into count(1)
@@ -153,6 +164,7 @@ object functions {
    * Aggregate function: returns the number of items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def count(columnName: String): Column = count(Column(columnName))
 
@@ -160,6 +172,7 @@ object functions {
    * Aggregate function: returns the number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def countDistinct(expr: Column, exprs: Column*): Column =
@@ -169,6 +182,7 @@ object functions {
    * Aggregate function: returns the number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def countDistinct(columnName: String, columnNames: String*): Column =
@@ -178,6 +192,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
 
@@ -185,6 +200,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
 
@@ -192,6 +208,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
 
@@ -199,6 +216,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(columnName: String, rsd: Double): Column = {
     approxCountDistinct(Column(columnName), rsd)
@@ -208,6 +226,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def avg(e: Column): Column = Average(e.expr)
 
@@ -215,6 +234,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def avg(columnName: String): Column = avg(Column(columnName))
 
@@ -222,6 +242,7 @@ object functions {
    * Aggregate function: returns the first value in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def first(e: Column): Column = First(e.expr)
 
@@ -229,6 +250,7 @@ object functions {
    * Aggregate function: returns the first value of a column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def first(columnName: String): Column = first(Column(columnName))
 
@@ -236,6 +258,7 @@ object functions {
    * Aggregate function: returns the last value in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def last(e: Column): Column = Last(e.expr)
 
@@ -243,6 +266,7 @@ object functions {
    * Aggregate function: returns the last value of the column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def last(columnName: String): Column = last(Column(columnName))
 
@@ -251,6 +275,7 @@ object functions {
    * Alias for avg.
    *
    * @group agg_funcs
+   * @since 1.4.0
    */
   def mean(e: Column): Column = avg(e)
 
@@ -259,6 +284,7 @@ object functions {
    * Alias for avg.
    *
    * @group agg_funcs
+   * @since 1.4.0
    */
   def mean(columnName: String): Column = avg(columnName)
 
@@ -266,6 +292,7 @@ object functions {
    * Aggregate function: returns the minimum value of the expression in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def min(e: Column): Column = Min(e.expr)
 
@@ -273,6 +300,7 @@ object functions {
    * Aggregate function: returns the minimum value of the column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def min(columnName: String): Column = min(Column(columnName))
 
@@ -280,6 +308,7 @@ object functions {
    * Aggregate function: returns the maximum value of the expression in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def max(e: Column): Column = Max(e.expr)
 
@@ -287,6 +316,7 @@ object functions {
    * Aggregate function: returns the maximum value of the column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def max(columnName: String): Column = max(Column(columnName))
 
@@ -298,6 +328,7 @@ object functions {
    * Computes the absolute value.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def abs(e: Column): Column = Abs(e.expr)
 
@@ -305,6 +336,7 @@ object functions {
    * Creates a new array column. The input columns must all have the same data type.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def array(cols: Column*): Column = CreateArray(cols.map(_.expr))
@@ -313,6 +345,7 @@ object functions {
    * Creates a new array column. The input columns must all have the same data type.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def array(colName: String, colNames: String*): Column = {
     array((colName +: colNames).map(col) : _*)
@@ -325,6 +358,7 @@ object functions {
    * }}}
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def coalesce(e: Column*): Column = Coalesce(e.map(_.expr))
@@ -333,6 +367,7 @@ object functions {
    * Converts a string exprsesion to lower case.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def lower(e: Column): Column = Lower(e.expr)
 
@@ -349,6 +384,7 @@ object functions {
    * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def monotonicallyIncreasingId(): Column = execution.expressions.MonotonicallyIncreasingID()
 
@@ -364,6 +400,7 @@ object functions {
    * }}}
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def negate(e: Column): Column = -e
 
@@ -378,6 +415,7 @@ object functions {
    * }}}
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def not(e: Column): Column = !e
 
@@ -385,6 +423,7 @@ object functions {
    * Generate a random column with i.i.d. samples from U[0.0, 1.0].
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def rand(seed: Long): Column = Rand(seed)
 
@@ -392,6 +431,7 @@ object functions {
    * Generate a random column with i.i.d. samples from U[0.0, 1.0].
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def rand(): Column = rand(Utils.random.nextLong)
 
@@ -399,6 +439,7 @@ object functions {
    * Generate a column with i.i.d. samples from the standard normal distribution.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def randn(seed: Long): Column = Randn(seed)
 
@@ -406,6 +447,7 @@ object functions {
    * Generate a column with i.i.d. samples from the standard normal distribution.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def randn(): Column = randn(Utils.random.nextLong)
 
@@ -415,6 +457,7 @@ object functions {
    * Note that this is indeterministic because it depends on data partitioning and task scheduling.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def sparkPartitionId(): Column = execution.expressions.SparkPartitionID
 
@@ -422,6 +465,7 @@ object functions {
    * Computes the square root of the specified float value.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def sqrt(e: Column): Column = Sqrt(e.expr)
 
@@ -430,6 +474,7 @@ object functions {
    * a derived column expression that is named (i.e. aliased).
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def struct(cols: Column*): Column = {
@@ -442,6 +487,7 @@ object functions {
    * Creates a new struct column that composes multiple input columns.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def struct(colName: String, colNames: String*): Column = {
     struct((colName +: colNames).map(col) : _*)
@@ -451,14 +497,15 @@ object functions {
    * Converts a string expression to upper case.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def upper(e: Column): Column = Upper(e.expr)
 
-
   /**
    * Computes bitwise NOT.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr)
 
@@ -471,6 +518,7 @@ object functions {
    * 0.0 through pi.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def acos(e: Column): Column = Acos(e.expr)
 
@@ -479,6 +527,7 @@ object functions {
    * 0.0 through pi.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def acos(columnName: String): Column = acos(Column(columnName))
 
@@ -487,6 +536,7 @@ object functions {
    * -pi/2 through pi/2.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def asin(e: Column): Column = Asin(e.expr)
 
@@ -495,6 +545,7 @@ object functions {
    * -pi/2 through pi/2.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def asin(columnName: String): Column = asin(Column(columnName))
 
@@ -502,6 +553,7 @@ object functions {
    * Computes the tangent inverse of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan(e: Column): Column = Atan(e.expr)
 
@@ -509,6 +561,7 @@ object functions {
    * Computes the tangent inverse of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan(columnName: String): Column = atan(Column(columnName))
 
@@ -517,6 +570,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Column, r: Column): Column = Atan2(l.expr, r.expr)
 
@@ -525,6 +579,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Column, rightName: String): Column = atan2(l, Column(rightName))
 
@@ -533,6 +588,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(leftName: String, r: Column): Column = atan2(Column(leftName), r)
 
@@ -541,6 +597,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(leftName: String, rightName: String): Column =
     atan2(Column(leftName), Column(rightName))
@@ -550,6 +607,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Column, r: Double): Column = atan2(l, lit(r).expr)
 
@@ -558,6 +616,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(leftName: String, r: Double): Column = atan2(Column(leftName), r)
 
@@ -566,6 +625,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Double, r: Column): Column = atan2(lit(l).expr, r)
 
@@ -574,6 +634,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Double, rightName: String): Column = atan2(l, Column(rightName))
 
@@ -581,6 +642,7 @@ object functions {
    * Computes the cube-root of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cbrt(e: Column): Column = Cbrt(e.expr)
 
@@ -588,6 +650,7 @@ object functions {
    * Computes the cube-root of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cbrt(columnName: String): Column = cbrt(Column(columnName))
 
@@ -595,6 +658,7 @@ object functions {
    * Computes the ceiling of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def ceil(e: Column): Column = Ceil(e.expr)
 
@@ -602,6 +666,7 @@ object functions {
    * Computes the ceiling of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def ceil(columnName: String): Column = ceil(Column(columnName))
 
@@ -609,6 +674,7 @@ object functions {
    * Computes the cosine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cos(e: Column): Column = Cos(e.expr)
 
@@ -616,6 +682,7 @@ object functions {
    * Computes the cosine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cos(columnName: String): Column = cos(Column(columnName))
 
@@ -623,6 +690,7 @@ object functions {
    * Computes the hyperbolic cosine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cosh(e: Column): Column = Cosh(e.expr)
 
@@ -630,6 +698,7 @@ object functions {
    * Computes the hyperbolic cosine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cosh(columnName: String): Column = cosh(Column(columnName))
 
@@ -637,6 +706,7 @@ object functions {
    * Computes the exponential of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def exp(e: Column): Column = Exp(e.expr)
 
@@ -644,6 +714,7 @@ object functions {
    * Computes the exponential of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def exp(columnName: String): Column = exp(Column(columnName))
 
@@ -651,6 +722,7 @@ object functions {
    * Computes the exponential of the given value minus one.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def expm1(e: Column): Column = Expm1(e.expr)
 
@@ -658,6 +730,7 @@ object functions {
    * Computes the exponential of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def expm1(columnName: String): Column = expm1(Column(columnName))
 
@@ -665,6 +738,7 @@ object functions {
    * Computes the floor of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def floor(e: Column): Column = Floor(e.expr)
 
@@ -672,6 +746,7 @@ object functions {
    * Computes the floor of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def floor(columnName: String): Column = floor(Column(columnName))
 
@@ -679,6 +754,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Column, r: Column): Column = Hypot(l.expr, r.expr)
 
@@ -686,6 +762,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName))
 
@@ -693,6 +770,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r)
 
@@ -700,6 +778,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(leftName: String, rightName: String): Column =
     hypot(Column(leftName), Column(rightName))
@@ -708,6 +787,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Column, r: Double): Column = hypot(l, lit(r).expr)
 
@@ -715,6 +795,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r)
 
@@ -722,6 +803,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Double, r: Column): Column = hypot(lit(l).expr, r)
 
@@ -729,6 +811,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))
 
@@ -736,6 +819,7 @@ object functions {
    * Computes the natural logarithm of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log(e: Column): Column = Log(e.expr)
 
@@ -743,6 +827,7 @@ object functions {
    * Computes the natural logarithm of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log(columnName: String): Column = log(Column(columnName))
 
@@ -750,6 +835,7 @@ object functions {
    * Computes the logarithm of the given value in Base 10.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log10(e: Column): Column = Log10(e.expr)
 
@@ -757,6 +843,7 @@ object functions {
    * Computes the logarithm of the given value in Base 10.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log10(columnName: String): Column = log10(Column(columnName))
 
@@ -764,6 +851,7 @@ object functions {
    * Computes the natural logarithm of the given value plus one.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log1p(e: Column): Column = Log1p(e.expr)
 
@@ -771,6 +859,7 @@ object functions {
    * Computes the natural logarithm of the given column plus one.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log1p(columnName: String): Column = log1p(Column(columnName))
 
@@ -778,6 +867,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Column, r: Column): Column = Pow(l.expr, r.expr)
 
@@ -785,6 +875,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Column, rightName: String): Column = pow(l, Column(rightName))
 
@@ -792,6 +883,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(leftName: String, r: Column): Column = pow(Column(leftName), r)
 
@@ -799,6 +891,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName))
 
@@ -806,6 +899,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Column, r: Double): Column = pow(l, lit(r).expr)
 
@@ -813,6 +907,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(leftName: String, r: Double): Column = pow(Column(leftName), r)
 
@@ -820,6 +915,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Double, r: Column): Column = pow(lit(l).expr, r)
 
@@ -827,6 +923,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
 
@@ -835,6 +932,7 @@ object functions {
    * is equal to a mathematical integer.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def rint(e: Column): Column = Rint(e.expr)
 
@@ -843,6 +941,7 @@ object functions {
    * is equal to a mathematical integer.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def rint(columnName: String): Column = rint(Column(columnName))
 
@@ -850,6 +949,7 @@ object functions {
    * Computes the signum of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def signum(e: Column): Column = Signum(e.expr)
 
@@ -857,6 +957,7 @@ object functions {
    * Computes the signum of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def signum(columnName: String): Column = signum(Column(columnName))
 
@@ -864,6 +965,7 @@ object functions {
    * Computes the sine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sin(e: Column): Column = Sin(e.expr)
 
@@ -871,6 +973,7 @@ object functions {
    * Computes the sine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sin(columnName: String): Column = sin(Column(columnName))
 
@@ -878,6 +981,7 @@ object functions {
    * Computes the hyperbolic sine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sinh(e: Column): Column = Sinh(e.expr)
 
@@ -885,6 +989,7 @@ object functions {
    * Computes the hyperbolic sine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sinh(columnName: String): Column = sinh(Column(columnName))
 
@@ -892,6 +997,7 @@ object functions {
    * Computes the tangent of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tan(e: Column): Column = Tan(e.expr)
 
@@ -899,6 +1005,7 @@ object functions {
    * Computes the tangent of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tan(columnName: String): Column = tan(Column(columnName))
 
@@ -906,6 +1013,7 @@ object functions {
    * Computes the hyperbolic tangent of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tanh(e: Column): Column = Tanh(e.expr)
 
@@ -913,6 +1021,7 @@ object functions {
    * Computes the hyperbolic tangent of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tanh(columnName: String): Column = tanh(Column(columnName))
 
@@ -920,6 +1029,7 @@ object functions {
    * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toDegrees(e: Column): Column = ToDegrees(e.expr)
 
@@ -927,6 +1037,7 @@ object functions {
    * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toDegrees(columnName: String): Column = toDegrees(Column(columnName))
 
@@ -934,6 +1045,7 @@ object functions {
    * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toRadians(e: Column): Column = ToRadians(e.expr)
 
@@ -941,6 +1053,7 @@ object functions {
    * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toRadians(columnName: String): Column = toRadians(Column(columnName))
     
@@ -960,6 +1073,7 @@ object functions {
      * The data types are automatically inferred based on the function's signature.
      *
      * @group udf_funcs
+     * @since 1.3.0
      */
     def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
       UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -976,6 +1090,7 @@ object functions {
      * you to specify the return data type.
      *
      * @group udf_funcs
+     * @since 1.3.0
      */
     def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = {
       ScalaUdf(f, returnType, Seq($argsInUdf))
@@ -988,6 +1103,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -998,6 +1114,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1008,6 +1125,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1018,6 +1136,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1028,6 +1147,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1038,6 +1158,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1048,6 +1169,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1058,6 +1180,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1068,6 +1191,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1078,6 +1202,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1088,6 +1213,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1100,6 +1226,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function0[_], returnType: DataType): Column = {
     ScalaUdf(f, returnType, Seq())
@@ -1110,6 +1237,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr))
@@ -1120,6 +1248,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr))
@@ -1130,6 +1259,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr))
@@ -1140,6 +1270,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr))
@@ -1150,6 +1281,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr))
@@ -1160,6 +1292,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr))
@@ -1170,6 +1303,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr))
@@ -1180,6 +1314,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr))
@@ -1190,6 +1325,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr))
@@ -1200,6 +1336,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr))
@@ -1220,6 +1357,7 @@ object functions {
    * }}}
    *
    * @group udf_funcs
+   * @since 1.4.0
    */
   def callUdf(udfName: String, cols: Column*): Column = {
      UnresolvedFunction(udfName, cols.map(_.expr))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
index 791046e0079d6..24e86ca415c51 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -19,83 +19,113 @@ package org.apache.spark.sql.sources
 
 /**
  * A filter predicate for data sources.
+ *
+ * @since 1.3.0
  */
 abstract class Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * equal to `value`.
+ *
+ * @since 1.3.0
  */
 case class EqualTo(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * greater than `value`.
+ *
+ * @since 1.3.0
  */
 case class GreaterThan(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * greater than or equal to `value`.
+ *
+ * @since 1.3.0
  */
 case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * less than `value`.
+ *
+ * @since 1.3.0
  */
 case class LessThan(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * less than or equal to `value`.
+ *
+ * @since 1.3.0
  */
 case class LessThanOrEqual(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to one of the values in the array.
+ *
+ * @since 1.3.0
  */
 case class In(attribute: String, values: Array[Any]) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to null.
+ *
+ * @since 1.3.0
  */
 case class IsNull(attribute: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a non-null value.
+ *
+ * @since 1.3.0
  */
 case class IsNotNull(attribute: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff both `left` or `right` evaluate to `true`.
+ *
+ * @since 1.3.0
  */
 case class And(left: Filter, right: Filter) extends Filter
 
 /**
  * A filter that evaluates to `true` iff at least one of `left` or `right` evaluates to `true`.
+ *
+ * @since 1.3.0
  */
 case class Or(left: Filter, right: Filter) extends Filter
 
 /**
  * A filter that evaluates to `true` iff `child` is evaluated to `false`.
+ *
+ * @since 1.3.0
  */
 case class Not(child: Filter) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
  * a string that starts with `value`.
+ *
+ * @since 1.3.1
  */
 case class StringStartsWith(attribute: String, value: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
  * a string that starts with `value`.
+ *
+ * @since 1.3.1
  */
 case class StringEndsWith(attribute: String, value: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
  * a string that contains the string `value`.
+ *
+ * @since 1.3.1
  */
 case class StringContains(attribute: String, value: String) extends Filter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 5e010d21120f6..6f315305c11d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -43,6 +43,8 @@ import org.apache.spark.sql.types.{StructField, StructType}
  * data source 'org.apache.spark.sql.json.DefaultSource'
  *
  * A new instance of this class with be instantiated each time a DDL call is made.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait RelationProvider {
@@ -72,6 +74,8 @@ trait RelationProvider {
  * users need to provide a schema when using a SchemaRelationProvider.
  * A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]]
  * if it can support both schema inference and user-specified schemas.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait SchemaRelationProvider {
@@ -106,6 +110,8 @@ trait SchemaRelationProvider {
  * using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]],
  * and [[FSBasedRelationProvider]] if it can support schema inference, user-specified
  * schemas, and accessing partitioned relations.
+ *
+ * @since 1.4.0
  */
 trait FSBasedRelationProvider {
   /**
@@ -121,6 +127,9 @@ trait FSBasedRelationProvider {
       parameters: Map[String, String]): FSBasedRelation
 }
 
+/**
+ * @since 1.3.0
+ */
 @DeveloperApi
 trait CreatableRelationProvider {
   /**
@@ -134,6 +143,8 @@ trait CreatableRelationProvider {
     * existing data is expected to be overwritten by the contents of the DataFrame.
     * ErrorIfExists mode means that when saving a DataFrame to a data source,
     * if data already exists, an exception is expected to be thrown.
+     *
+     * @since 1.3.0
     */
   def createRelation(
       sqlContext: SQLContext,
@@ -152,6 +163,8 @@ trait CreatableRelationProvider {
  * BaseRelations must also define a equality function that only returns true when the two
  * instances will return the same data. This equality function is used when determining when
  * it is safe to substitute cached results for a given relation.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 abstract class BaseRelation {
@@ -167,6 +180,8 @@ abstract class BaseRelation {
    *
    * Note that it is always better to overestimate size than underestimate, because underestimation
    * could lead to execution plans that are suboptimal (i.e. broadcasting a very large table).
+   *
+   * @since 1.3.0
    */
   def sizeInBytes: Long = sqlContext.conf.defaultSizeInBytes
 
@@ -177,6 +192,8 @@ abstract class BaseRelation {
    *
    * Note: The internal representation is not stable across releases and thus data sources outside
    * of Spark SQL should leave this as true.
+   *
+   * @since 1.4.0
    */
   def needConversion: Boolean = true
 }
@@ -184,6 +201,8 @@ abstract class BaseRelation {
 /**
  * ::DeveloperApi::
  * A BaseRelation that can produce all of its tuples as an RDD of Row objects.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait TableScan {
@@ -194,6 +213,8 @@ trait TableScan {
  * ::DeveloperApi::
  * A BaseRelation that can eliminate unneeded columns before producing an RDD
  * containing all of its tuples as Row objects.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait PrunedScan {
@@ -211,6 +232,8 @@ trait PrunedScan {
  * The pushed down filters are currently purely an optimization as they will all be evaluated
  * again.  This means it is safe to use them with methods that produce false positives such
  * as filtering partitions based on a bloom filter.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait PrunedFilteredScan {
@@ -232,6 +255,8 @@ trait PrunedFilteredScan {
  * 3. It assumes that fields of the data provided in the insert method are nullable.
  * If a data source needs to check the actual nullability of a field, it needs to do it in the
  * insert method.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait InsertableRelation {
@@ -245,6 +270,8 @@ trait InsertableRelation {
  * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].  Unlike the other APIs this
  * interface is NOT designed to be binary compatible across releases and thus should only be used
  * for experimentation.
+ *
+ * @since 1.3.0
  */
 @Experimental
 trait CatalystScan {
@@ -257,6 +284,8 @@ trait CatalystScan {
  * underlying file system.  Subclasses of [[OutputWriter]] must provide a zero-argument constructor.
  * An [[OutputWriter]] instance is created and initialized when a new output file is opened on
  * executor side.  This instance is used to persist rows to this single output file.
+ *
+ * @since 1.4.0
  */
 @Experimental
 abstract class OutputWriter {
@@ -270,6 +299,8 @@ abstract class OutputWriter {
    * @param dataSchema Schema of the rows to be written. Partition columns are not included in the
    *        schema if the corresponding relation is partitioned.
    * @param context The Hadoop MapReduce task context.
+   *
+   * @since 1.4.0
    */
   def init(
       path: String,
@@ -279,12 +310,16 @@ abstract class OutputWriter {
   /**
    * Persists a single row.  Invoked on the executor side.  When writing to dynamically partitioned
    * tables, dynamic partition columns are not included in rows to be written.
+   *
+   * @since 1.4.0
    */
   def write(row: Row): Unit
 
   /**
    * Closes the [[OutputWriter]]. Invoked on the executor side after all rows are persisted, before
    * the task output is committed.
+   *
+   * @since 1.4.0
    */
   def close(): Unit
 }
@@ -310,6 +345,8 @@ abstract class OutputWriter {
  *        directories of all partition directories.
  * @param maybePartitionSpec An [[FSBasedRelation]] can be created with an optional
  *        [[PartitionSpec]], so that partition discovery can be skipped.
+ *
+ * @since 1.4.0
  */
 @Experimental
 abstract class FSBasedRelation private[sql](
@@ -323,6 +360,8 @@ abstract class FSBasedRelation private[sql](
    * @param paths Base paths of this relation.  For partitioned relations, it should be either root
    *        directories of all partition directories.
    * @param partitionColumns Partition columns of this relation.
+   *
+   * @since 1.4.0
    */
   def this(paths: Array[String], partitionColumns: StructType) =
     this(paths, {
@@ -335,6 +374,8 @@ abstract class FSBasedRelation private[sql](
    *
    * @param paths Base paths of this relation.  For partitioned relations, it should be root
    *        directories of all partition directories.
+   *
+   * @since 1.4.0
    */
   def this(paths: Array[String]) = this(paths, None)
 
@@ -356,6 +397,8 @@ abstract class FSBasedRelation private[sql](
 
   /**
    * Partition columns. Note that they are always nullable.
+   *
+   * @since 1.4.0
    */
   def partitionColumns: StructType = partitionSpec.partitionColumns
 
@@ -385,6 +428,8 @@ abstract class FSBasedRelation private[sql](
   /**
    * Schema of this relation.  It consists of columns appearing in [[dataSchema]] and all partition
    * columns not appearing in [[dataSchema]].
+   *
+   * @since 1.4.0
    */
   override lazy val schema: StructType = {
     val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet
@@ -396,6 +441,8 @@ abstract class FSBasedRelation private[sql](
   /**
    * Specifies schema of actual data files.  For partitioned relations, if one or more partitioned
    * columns are contained in the data files, they should also appear in `dataSchema`.
+   *
+   * @since 1.4.0
    */
   def dataSchema: StructType
 
@@ -407,6 +454,8 @@ abstract class FSBasedRelation private[sql](
    * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
+   *
+   * @since 1.4.0
    */
   def buildScan(inputPaths: Array[String]): RDD[Row] = {
     throw new RuntimeException(
@@ -422,6 +471,8 @@ abstract class FSBasedRelation private[sql](
    * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
+   *
+   * @since 1.4.0
    */
   def buildScan(requiredColumns: Array[String], inputPaths: Array[String]): RDD[Row] = {
     // Yeah, to workaround serialization...
@@ -458,6 +509,8 @@ abstract class FSBasedRelation private[sql](
    * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
+   *
+   * @since 1.4.0
    */
   def buildScan(
       requiredColumns: Array[String],
@@ -473,12 +526,16 @@ abstract class FSBasedRelation private[sql](
    * Note that the only side effect expected here is mutating `job` via its setters.  Especially,
    * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states
    * may cause unexpected behaviors.
+   *
+   * @since 1.4.0
    */
   def prepareForWrite(job: Job): Unit = ()
 
   /**
    * This method is responsible for producing a new [[OutputWriter]] for each newly opened output
    * file on the executor side.
+   *
+   * @since 1.4.0
    */
   def outputWriterClass: Class[_ <: OutputWriter]
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 61e8c154e8c3c..766c42d040f80 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -63,6 +63,8 @@ private[hive] class HiveQLDialect extends ParserDialect {
 /**
  * An instance of the Spark SQL execution engine that integrates with data stored in Hive.
  * Configuration for Hive is read from hive-site.xml on the classpath.
+ *
+ * @since 1.0.0
  */
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
@@ -225,6 +227,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    * Spark SQL or the external data source library it uses might cache certain metadata about a
    * table, such as the location of blocks. When those change outside of Spark SQL, users should
    * call this function to invalidate the cache.
+   *
+   * @since 1.3.0
    */
   def refreshTable(tableName: String): Unit = {
     // TODO: Database support...
@@ -242,6 +246,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    *
    * Right now, it only supports Hive tables and it only updates the size of a Hive table
    * in the Hive metastore.
+   *
+   * @since 1.2.0
    */
   @Experimental
   def analyze(tableName: String) {

From 97dee313f23b00f15638cb72a4a80c1f197f8a9d Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 12 May 2015 21:43:34 -0700
Subject: [PATCH 59/59] [SPARK-7321][SQL] Add Column expression for conditional
 statements (when/otherwise)

This builds on https://github.com/apache/spark/pull/5932 and should close https://github.com/apache/spark/pull/5932 as well.

As an example:
```python
df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
```

Author: Reynold Xin <rxin@databricks.com>
Author: kaka1992 <kaka_1992@163.com>

Closes #6072 from rxin/when-expr and squashes the following commits:

8f49201 [Reynold Xin] Throw exception if otherwise is applied twice.
0455eda [Reynold Xin] Reset run-tests.
bfb9d9f [Reynold Xin] Updated documentation and test cases.
762f6a5 [Reynold Xin] Merge pull request #5932 from kaka1992/IFCASE
95724c6 [kaka1992] Update
8218d0a [kaka1992] Update
801009e [kaka1992] Update
76d6346 [kaka1992] [SPARK-7321][SQL] Add Column expression for conditional statements (if, case)
---
 python/pyspark/sql/__init__.py                |  2 +
 python/pyspark/sql/dataframe.py               | 31 ++++++++++
 python/pyspark/sql/functions.py               | 26 +++++++-
 .../scala/org/apache/spark/sql/Column.scala   | 61 +++++++++++++++++++
 .../org/apache/spark/sql/functions.scala      | 24 ++++++++
 .../spark/sql/ColumnExpressionSuite.scala     | 21 +++++++
 6 files changed, 163 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index b60b991dd4d8b..7192c89b3dc7f 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -32,6 +32,8 @@
       Aggregation methods, returned by :func:`DataFrame.groupBy`.
     - L{DataFrameNaFunctions}
       Methods for handling missing data (null values).
+    - L{DataFrameStatFunctions}
+      Methods for statistics functionality.
     - L{functions}
       List of built-in functions available for :class:`DataFrame`.
     - L{types}
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 078acfdf7e2df..82cb1c2fdbf94 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1546,6 +1546,37 @@ def between(self, lowerBound, upperBound):
         """
         return (self >= lowerBound) & (self <= upperBound)
 
+    @ignore_unicode_prefix
+    def when(self, condition, value):
+        """Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param condition: a boolean :class:`Column` expression.
+        :param value: a literal value, or a :class:`Column` expression.
+
+        """
+        sc = SparkContext._active_spark_context
+        if not isinstance(condition, Column):
+            raise TypeError("condition should be a Column")
+        v = value._jc if isinstance(value, Column) else value
+        jc = sc._jvm.functions.when(condition._jc, v)
+        return Column(jc)
+
+    @ignore_unicode_prefix
+    def otherwise(self, value):
+        """Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param value: a literal value, or a :class:`Column` expression.
+        """
+        v = value._jc if isinstance(value, Column) else value
+        jc = self._jc.otherwise(value)
+        return Column(jc)
+
     def __repr__(self):
         return 'Column<%s>' % self._jc.toString().encode('utf8')
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 38a043a3c59d7..d91265ee0bec8 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -32,13 +32,14 @@
 
 __all__ = [
     'approxCountDistinct',
+    'coalesce',
     'countDistinct',
     'monotonicallyIncreasingId',
     'rand',
     'randn',
     'sparkPartitionId',
-    'coalesce',
-    'udf']
+    'udf',
+    'when']
 
 
 def _create_function(name, doc=""):
@@ -291,6 +292,27 @@ def struct(*cols):
     return Column(jc)
 
 
+def when(condition, value):
+    """Evaluates a list of conditions and returns one of multiple possible result expressions.
+    If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+    :param condition: a boolean :class:`Column` expression.
+    :param value: a literal value, or a :class:`Column` expression.
+
+    >>> df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
+    [Row(age=3), Row(age=4)]
+
+    >>> df.select(when(df.age == 2, df.age + 1).alias("age")).collect()
+    [Row(age=3), Row(age=None)]
+    """
+    sc = SparkContext._active_spark_context
+    if not isinstance(condition, Column):
+        raise TypeError("condition should be a Column")
+    v = value._jc if isinstance(value, Column) else value
+    jc = sc._jvm.functions.when(condition._jc, v)
+    return Column(jc)
+
+
 class UserDefinedFunction(object):
     """
     User defined function in Python
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 4773dedf72117..42f5bcda49cfb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -327,6 +327,67 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    */
   def eqNullSafe(other: Any): Column = this <=> other
 
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions.
+   * If otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def when(condition: Column, value: Any):Column = this.expr match {
+    case CaseWhen(branches: Seq[Expression]) =>
+      CaseWhen(branches ++ Seq(lit(condition).expr, lit(value).expr))
+    case _ =>
+      throw new IllegalArgumentException(
+        "when() can only be applied on a Column previously generated by when() function")
+  }
+
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions.
+   * If otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def otherwise(value: Any):Column = this.expr match {
+    case CaseWhen(branches: Seq[Expression]) =>
+      if (branches.size % 2 == 0) {
+        CaseWhen(branches :+ lit(value).expr)
+      } else {
+        throw new IllegalArgumentException(
+          "otherwise() can only be applied once on a Column previously generated by when()")
+      }
+    case _ =>
+      throw new IllegalArgumentException(
+        "otherwise() can only be applied on a Column previously generated by when()")
+  }
+
   /**
    * True if the current column is between the lower bound and upper bound, inclusive.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 215787e40bf0a..099e1d8f03272 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -419,6 +419,30 @@ object functions {
    */
   def not(e: Column): Column = !e
 
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions.
+   * If otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group normal_funcs
+   */
+  def when(condition: Column, value: Any): Column = {
+    CaseWhen(Seq(condition.expr, lit(value).expr))
+  }
+
   /**
    * Generate a random column with i.i.d. samples from U[0.0, 1.0].
    *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index d96186c268720..269e185543059 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -255,6 +255,27 @@ class ColumnExpressionSuite extends QueryTest {
       Row(false, true) :: Row(true, false) :: Row(true, true) :: Nil)
   }
 
+  test("SPARK-7321 when conditional statements") {
+    val testData = (1 to 3).map(i => (i, i.toString)).toDF("key", "value")
+
+    checkAnswer(
+      testData.select(when($"key" === 1, -1).when($"key" === 2, -2).otherwise(0)),
+      Seq(Row(-1), Row(-2), Row(0))
+    )
+
+    // Without the ending otherwise, return null for unmatched conditions.
+    // Also test putting a non-literal value in the expression.
+    checkAnswer(
+      testData.select(when($"key" === 1, lit(0) - $"key").when($"key" === 2, -2)),
+      Seq(Row(-1), Row(-2), Row(null))
+    )
+
+    // Test error handling for invalid expressions.
+    intercept[IllegalArgumentException] { $"key".when($"key" === 1, -1) }
+    intercept[IllegalArgumentException] { $"key".otherwise(-1) }
+    intercept[IllegalArgumentException] { when($"key" === 1, -1).otherwise(-1).otherwise(-1) }
+  }
+
   test("sqrt") {
     checkAnswer(
       testData.select(sqrt('key)).orderBy('key.asc),