Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-6418] Add simple per-stage visualization to the UI [WIP] #5547

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions core/src/main/resources/org/apache/spark/ui/static/d3.min.js

Large diffs are not rendered by default.

Large diffs are not rendered by default.

118 changes: 118 additions & 0 deletions core/src/main/resources/org/apache/spark/ui/static/jobs-graph.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
function renderJobsGraphs(data) {
/* show visualization toggle */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use 2 spaces for indents throughout all javascript files, instead of tab characters?

$(".expand-visualization-arrow").toggleClass('arrow-closed');
$(".expand-visualization-arrow").toggleClass('arrow-open');
if ($(".expand-visualization-arrow").hasClass("arrow-closed")) {
$("#chartContainer").empty();
return;
}

/* no data to graph */
if (!Object.keys(data).length) {
return;
}

/* format data to a form readable by dimple.js */
var tableData = [];
for (var k in data) {
var arr = (data[k]).split(",");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you pass each of the inputs as lists, I think you won't need to do this.

data[k] = arr;
}
var startTime = getMin(data["launchtime"]);
var numTasks = Math.min(1000, data[k].length);

/*data update */
data["launchtime"] = data["launchtime"].map(function (launchTime) {return launchTime-startTime;});
var maxTime = 0;
for (i = 0; i < numTasks; i++) {
var time = 0;
for (var key in data) {
time += parseFloat(data[key][i]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might be pretty slow when there are thousands of tasks - if so, sending proper double types in JSON would be faster.

}
maxTime = Math.max(time, maxTime);
}
divisorAndTitle = getDivisiorAndTitle(maxTime);
for (i = 0; i < numTasks; i++) {
for (var key in data) {
job = {};
job["Task #"] = i;
job["Task"] = key;
job["Time"] = parseFloat(data[key][i])/divisorAndTitle[0];
tableData.push(job);
}
}

var height = Math.max(Math.min(numTasks * 50, 2000), 200);
var svg = dimple.newSvg("#chartContainer", "100%", height);
var chart = new dimple.chart(svg);
chart.setMargins(60, 60, 60, 60);

var x = chart.addMeasureAxis("x", "Time");
x.fontSize = "12px";
x.title = divisorAndTitle[1];

var y = chart.addCategoryAxis("y", "Task #");
y.fontSize = "12px";

var s = chart.addSeries("Task", dimple.plot.bar);
s.data = tableData;
s.addOrderRule(getOrderRule());

s.getTooltipText = function (dat) {
return ["Task #: " + dat["yField"][0],
"Phase: " + dat["aggField"][0],
"Time (ms): " + dat["xValue"]*divisorAndTitle[0]
];
};

chart.addLegend(20, 10, 1000, 40, "right");
(chart.legends[0]).fontSize = "12px";

chart.draw();
svg.selectAll(".dimple-launchtime").remove();
numTicks(y, Math.floor(numTasks/20));
}

function getMin(arr) {
return Math.min.apply(null, arr);
}

function getOrderRule() {
return ["launchtime", "Scheduler Delay", "Task Deserialization Time",
"Duration", "Result Serialization Time", "Getting Result Time", "GC Time"];
}

function getDivisiorAndTitle(maxTime) {
var sec = 1000;
var min = sec * 60;
var hr = min * 60;
if (maxTime >= hr) {
return [hr, "Time (hr)"];
} else if (maxTime >= min) {
return [min, "Time (min)"];
} else if (maxTime >= sec) {
return [sec, "Time (s)"];
} else {
return [1, "Time (ms)"];
}
}

/* limits the number of ticks in the Y-axis to oneInEvery */
function numTicks(axis, oneInEvery) {
if (axis.shapes.length > 0) {
var del = 0;
if (oneInEvery > 1) {
axis.shapes.selectAll("text").each(function (d) {
if (del % oneInEvery !== 0) {
this.remove();
axis.shapes.selectAll("line").each(function (d2) {
if (d === d2) {
this.remove();
}
});
}
del += 1;
});
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ pre {
border: none;
}

span.expand-additional-metrics {
span.expand-additional-metrics, span.expand-visualization {
cursor: pointer;
}

Expand Down
3 changes: 3 additions & 0 deletions core/src/main/scala/org/apache/spark/ui/UIUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ private[spark] object UIUtils extends Logging {
<script src={prependBaseUri("/static/initialize-tooltips.js")}></script>
<script src={prependBaseUri("/static/table.js")}></script>
<script src={prependBaseUri("/static/additional-metrics.js")}></script>
<script src={prependBaseUri("/static/d3.min.js")}></script>
<script src={prependBaseUri("/static/dimple.min.js")}></script>
<script src={prependBaseUri("/static/jobs-graph.js")}></script>
}

/** Returns a spark page with correctly formatted headers */
Expand Down
34 changes: 34 additions & 0 deletions core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {

val stageData = stageDataOption.get
val tasks = stageData.taskData.values.toSeq.sortBy(_.taskInfo.launchTime)
val graphData = scala.collection.mutable.Map[String, String]()

val numCompleted = tasks.count(_.taskInfo.finished)
val accumulables = listener.stageIdToData((stageId, stageAttemptId)).accumulables
Expand Down Expand Up @@ -234,6 +235,8 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
val deserializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
metrics.get.executorDeserializeTime.toDouble
}
graphData("Task Deserialization Time") = deserializationTimes.mkString(",")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than creating your own string representation of a list, can do the necessary conversation to pass proper JSON lists here?


val deserializationQuantiles =
<td>
<span data-toggle="tooltip" title={ToolTips.TASK_DESERIALIZATION_TIME}
Expand All @@ -250,6 +253,8 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
val gcTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
metrics.get.jvmGCTime.toDouble
}
graphData("GC Time") = gcTimes.mkString(",")

val gcQuantiles =
<td>
<span data-toggle="tooltip"
Expand All @@ -260,6 +265,8 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
val serializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
metrics.get.resultSerializationTime.toDouble
}
graphData("Result Serialization Time") = serializationTimes.mkString(",")

val serializationQuantiles =
<td>
<span data-toggle="tooltip"
Expand All @@ -271,6 +278,8 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
val gettingResultTimes = validTasks.map { case TaskUIData(info, _, _) =>
getGettingResultTime(info).toDouble
}
graphData("Getting Result Time") = gettingResultTimes.mkString(",")

val gettingResultQuantiles =
<td>
<span data-toggle="tooltip"
Expand All @@ -285,6 +294,18 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
val schedulerDelays = validTasks.map { case TaskUIData(info, metrics, _) =>
getSchedulerDelay(info, metrics.get).toDouble
}
graphData("Scheduler Delay") = schedulerDelays.mkString(",")

val launchTimes = validTasks.map { case TaskUIData(info, metrics, _) =>
info.launchTime
}
graphData("launchtime") = launchTimes.mkString(",")

val durations = validTasks.map { case TaskUIData(info, metrics, _) => if (info.status == "RUNNING")
info.timeRunning(System.currentTimeMillis()) else metrics.map(_.executorRunTime).getOrElse(1L)
}
graphData("Duration") = durations.mkString(",")

val schedulerDelayTitle = <td><span data-toggle="tooltip"
title={ToolTips.SCHEDULER_DELAY} data-placement="right">Scheduler Delay</span></td>
val schedulerDelayQuantiles = schedulerDelayTitle +:
Expand Down Expand Up @@ -431,13 +452,26 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
val maybeAccumulableTable: Seq[Node] =
if (accumulables.size > 0) { <h4>Accumulators</h4> ++ accumulableTable } else Seq()

val showVisualization =
<div>
<span class="expand-visualization" onclick="render();">
<span class="expand-visualization-arrow arrow-closed"></span>
<strong>Show Visualization</strong>
</span>
<div id="chartContainer" class="container"></div>
<script type="text/javascript">
{Unparsed(s"function render() {renderJobsGraphs(${scala.util.parsing.json.JSONObject(graphData.toMap).toString()})}")}
</script>
</div>

val content =
summary ++
showAdditionalMetrics ++
<h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
<div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
<h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq ++
maybeAccumulableTable ++
showVisualization ++
<h4>Tasks</h4> ++ taskTable

UIUtils.headerSparkPage("Details for Stage %d".format(stageId), content, parent)
Expand Down