diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 41758fc16a72a..90c6b025dfff0 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -53,8 +53,9 @@
var VizConstants = {
rddColor: "#444444",
+ rddCachedColor: "#7DDD00",
+ rddOperationColor: "#AADFFF",
stageColor: "#FFDDEE",
- operationScopeColor: "#AADFFF",
clusterLabelColor: "#888888",
edgeColor: "#444444",
edgeWidth: "1.5px",
@@ -125,6 +126,12 @@ function renderDagViz(forJob) {
renderDagVizForStage(svg);
}
+ // Find cached RDDs
+ metadataContainer().selectAll(".cached-rdd").each(function(v) {
+ var nodeId = VizConstants.nodePrefix + d3.select(this).text();
+ graphContainer().selectAll("#" + nodeId).classed("cached", true);
+ });
+
// Set the appropriate SVG dimensions to ensure that all elements are displayed
var boundingBox = svg.node().getBBox();
svg.style("width", (boundingBox.width + VizConstants.svgMarginX) + "px");
@@ -240,7 +247,7 @@ function renderDot(dot, container) {
function styleDagViz(forJob) {
graphContainer().selectAll("svg g.cluster rect")
.style("fill", "white")
- .style("stroke", VizConstants.operationScopeColor)
+ .style("stroke", VizConstants.rddOperationColor)
.style("stroke-width", "4px")
.style("stroke-opacity", "0.5");
graphContainer().selectAll("svg g.cluster text")
@@ -279,6 +286,9 @@ function styleDagViz(forJob) {
function styleDagVizForJob() {
graphContainer().selectAll("svg g.node circle")
.style("fill", VizConstants.rddColor);
+ // TODO: add a legend to explain what a highlighted dot means
+ graphContainer().selectAll("svg g.cached circle")
+ .style("fill", VizConstants.rddCachedColor);
graphContainer().selectAll("svg g#cross-stage-edges path")
.style("fill", "none");
}
@@ -289,6 +299,9 @@ function styleDagVizForStage() {
.style("fill", "none")
.style("stroke", VizConstants.rddColor)
.style("stroke-width", "2px");
+ // TODO: add a legend to explain what a highlighted RDD means
+ graphContainer().selectAll("svg g.cached rect")
+ .style("stroke", VizConstants.rddCachedColor);
graphContainer().selectAll("svg g.node g.label text tspan")
.style("fill", VizConstants.rddColor);
}
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 08917caa30849..2f3fb181e4026 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -362,6 +362,11 @@ private[spark] object UIUtils extends Logging {
{RDDOperationGraph.makeDotFile(g, forJob)}
{ g.incomingEdges.map { e => {e.fromId},{e.toId}
} }
{ g.outgoingEdges.map { e => {e.fromId},{e.toId}
} }
+ {
+ g.rootCluster.getAllNodes.filter(_.cached).map { n =>
+ {n.id}
+ }
+ }
}
}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index a7ea12b1655fe..f6abf27db49dd 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -179,7 +179,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
- Event Timeline
+ Event timeline
++
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index aa7156dd83657..96cc3d78d0f15 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -161,7 +161,7 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
- Event Timeline
+ Event timeline
++
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index 7547f071ccc02..8be14c87fef5d 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable.ListBuffer
import org.apache.spark.Logging
import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.storage.StorageLevel
/**
* A representation of a generic cluster graph used for storing information on RDD operations.
@@ -37,7 +38,7 @@ private[ui] case class RDDOperationGraph(
rootCluster: RDDOperationCluster)
/** A node in an RDDOperationGraph. This represents an RDD. */
-private[ui] case class RDDOperationNode(id: Int, name: String)
+private[ui] case class RDDOperationNode(id: Int, name: String, cached: Boolean)
/**
* A directed edge connecting two nodes in an RDDOperationGraph.
@@ -61,6 +62,11 @@ private[ui] class RDDOperationCluster(val id: String, val name: String) {
def attachChildCluster(childCluster: RDDOperationCluster): Unit = {
_childrenClusters += childCluster
}
+
+ /** Return all the nodes container in this cluster, including ones nested in other clusters. */
+ def getAllNodes: Seq[RDDOperationNode] = {
+ _childrenNodes ++ _childrenClusters.flatMap(_.childrenNodes)
+ }
}
private[ui] object RDDOperationGraph extends Logging {
@@ -90,7 +96,10 @@ private[ui] object RDDOperationGraph extends Logging {
// Find nodes, edges, and operation scopes that belong to this stage
stage.rddInfos.foreach { rdd =>
edges ++= rdd.parentIds.map { parentId => RDDOperationEdge(parentId, rdd.id) }
- val node = nodes.getOrElseUpdate(rdd.id, RDDOperationNode(rdd.id, rdd.name))
+
+ // TODO: differentiate between the intention to cache an RDD and whether it's actually cached
+ val node = nodes.getOrElseUpdate(
+ rdd.id, RDDOperationNode(rdd.id, rdd.name, rdd.storageLevel != StorageLevel.NONE))
if (rdd.scope == null) {
// This RDD has no encompassing scope, so we put it directly in the root cluster