Skip to content

Commit

Permalink
removing driver, improving recommendations and renaming files
Browse files Browse the repository at this point in the history
  • Loading branch information
swasti committed Jan 5, 2018
1 parent b2e23a4 commit fa62ddf
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 37 deletions.
6 changes: 3 additions & 3 deletions app-conf/HeuristicConf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@
</heuristic>
<heuristic>
<applicationtype>spark</applicationtype>
<heuristicname>Spark GC Time to Run Time</heuristicname>
<classname>com.linkedin.drelephant.spark.heuristics.GcCpuTimeHeuristic</classname>
<viewname>views.html.help.spark.helpGcCpuTimeHeuristic</viewname>
<heuristicname>Executor GC</heuristicname>
<classname>com.linkedin.drelephant.spark.heuristics.ExecutorGcHeuristic</classname>
<viewname>views.html.help.spark.helpExecutorGcHeuristic</viewname>
</heuristic>

</heuristics>
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ import com.linkedin.drelephant.spark.data.SparkApplicationData
import scala.collection.JavaConverters

/**
* A heuristic based on GC time and CPU run time
* A heuristic based on GC time and CPU run time. It calculates the ratio of the total time a job spends in GC to the total run time of a job and warns if too much time is spent in GC.
*/
class GcCpuTimeHeuristic(private val heuristicConfigurationData: HeuristicConfigurationData)
class ExecutorGcHeuristic(private val heuristicConfigurationData: HeuristicConfigurationData)
extends Heuristic[SparkApplicationData] {

import GcCpuTimeHeuristic._
import ExecutorGcHeuristic._
import JavaConverters._

val gcSeverityAThresholds: SeverityThresholds =
Expand All @@ -47,31 +47,31 @@ class GcCpuTimeHeuristic(private val heuristicConfigurationData: HeuristicConfig
val evaluator = new Evaluator(this, data)
var resultDetails = Seq(
new HeuristicResultDetails("GC time to Executor Run time ratio", evaluator.ratio.toString),
new HeuristicResultDetails("GC total time", evaluator.jvmTime.toString),
new HeuristicResultDetails("Executor Run time", evaluator.executorRunTimeTotal.toString)
new HeuristicResultDetails("Total GC time", evaluator.jvmTime.toString),
new HeuristicResultDetails("Total Executor Runtime", evaluator.executorRunTimeTotal.toString)
)

//adding recommendations to the result, severityTimeA corresponds to the ascending severity calculation
if (evaluator.severityTimeA.getValue > Severity.LOW.getValue) {
resultDetails = resultDetails :+ new HeuristicResultDetails("Note", "The ratio of JVM GC Time and executor Time is above normal, we recommend to increase the executor memory")
resultDetails = resultDetails :+ new HeuristicResultDetails("Gc ratio high", "The job is spending too much time on GC. We recommend increasing the executor memory.")
}
//severityTimeD corresponds to the descending severity calculation
if (evaluator.severityTimeD.getValue > Severity.LOW.getValue) {
resultDetails = resultDetails :+ new HeuristicResultDetails("Note", "The ratio of JVM GC Time and executor Time is below normal, we recommend to decrease the executor memory")
resultDetails = resultDetails :+ new HeuristicResultDetails("Gc ratio low", "The job is spending too less time in GC. Please check if you have asked for more executor memory than required.")
}

val result = new HeuristicResult(
heuristicConfigurationData.getClassName,
heuristicConfigurationData.getHeuristicName,
evaluator.severity,
evaluator.severityTimeA,
0,
resultDetails.asJava
)
result
}
}

object GcCpuTimeHeuristic {
object ExecutorGcHeuristic {
val SPARK_EXECUTOR_MEMORY = "spark.executor.memory"
val SPARK_EXECUTOR_CORES = "spark.executor.cores"

Expand All @@ -88,17 +88,17 @@ object GcCpuTimeHeuristic {
val GC_SEVERITY_A_THRESHOLDS_KEY: String = "gc_severity_A_threshold"
val GC_SEVERITY_D_THRESHOLDS_KEY: String = "gc_severity_D_threshold"

class Evaluator(gcCpuTimeHeuristic: GcCpuTimeHeuristic, data: SparkApplicationData) {
lazy val executorSummaries: Seq[ExecutorSummary] = data.executorSummaries
class Evaluator(executorGcHeuristic: ExecutorGcHeuristic, data: SparkApplicationData) {
lazy val executorAndDriverSummaries: Seq[ExecutorSummary] = data.executorSummaries
lazy val executorSummaries: Seq[ExecutorSummary] = executorAndDriverSummaries.filterNot(_.id.equals("driver"))
lazy val appConfigurationProperties: Map[String, String] =
data.appConfigurationProperties
var (jvmTime, executorRunTimeTotal) = getTimeValues(executorSummaries)

var ratio: Double = jvmTime.toDouble / executorRunTimeTotal.toDouble

lazy val severityTimeA: Severity = gcCpuTimeHeuristic.gcSeverityAThresholds.severityOf(ratio)
lazy val severityTimeD: Severity = gcCpuTimeHeuristic.gcSeverityAThresholds.severityOf(ratio)
lazy val severity : Severity = Severity.max(severityTimeA, severityTimeD)
lazy val severityTimeA: Severity = executorGcHeuristic.gcSeverityAThresholds.severityOf(ratio)
lazy val severityTimeD: Severity = executorGcHeuristic.gcSeverityDThresholds.severityOf(ratio)

/**
* returns the total JVM GC Time and total executor Run Time across all stages
Expand Down
20 changes: 20 additions & 0 deletions app/views/help/spark/helpExecutorGcHeuristic.scala.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
@*
* Copyright 2016 LinkedIn Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*@

<p>This analysis shows how much time a job is spending in GC. To normalise the results across all jobs, the ratio of the time a job spends in Gc to the total run time of the job is calculated. </p>
<p>A job is flagged if the ratio is too high, meaning the job spends too much time in GC.</p>
<h3>Suggestions</h3>
<p>We recommend increasing the executor memory.</p>
12 changes: 0 additions & 12 deletions app/views/help/spark/helpGcCpuTimeHeuristic.scala.html

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@ import org.scalatest.{FunSpec, Matchers}
import scala.concurrent.duration.Duration


class GcCpuTimeHeuristicTest extends FunSpec with Matchers {
import GcCpuTimeHeuristicTest._
class ExecutorGcHeuristicTest extends FunSpec with Matchers {
import ExecutorGcHeuristicTest._

describe("GcCpuTimeHeuristic") {
describe("ExecutorGcHeuristic") {
val heuristicConfigurationData = newFakeHeuristicConfigurationData(
Map(
"max_to_median_ratio_severity_thresholds" -> "1.414,2,4,16",
"ignore_max_bytes_less_than_threshold" -> "4000000",
"ignore_max_millis_less_than_threshold" -> "4000001"
)
)
val gcCpuTimeHeuristic = new GcCpuTimeHeuristic(heuristicConfigurationData)
val executorGcHeuristic = new ExecutorGcHeuristic(heuristicConfigurationData)

val executorSummaries = Seq(
newFakeExecutorSummary(
Expand All @@ -65,7 +65,7 @@ class GcCpuTimeHeuristicTest extends FunSpec with Matchers {

describe(".apply") {
val data1 = newFakeSparkApplicationData(executorSummaries)
val heuristicResult = gcCpuTimeHeuristic.apply(data1)
val heuristicResult = executorGcHeuristic.apply(data1)
val heuristicResultDetails = heuristicResult.getHeuristicResultDetails

it("returns the severity") {
Expand All @@ -80,20 +80,20 @@ class GcCpuTimeHeuristicTest extends FunSpec with Matchers {

it("returns the total GC time") {
val details = heuristicResultDetails.get(1)
details.getName should include("GC total time")
details.getName should include("Total GC time")
details.getValue should be("1200000")
}

it("returns the executor's run time") {
val details = heuristicResultDetails.get(2)
details.getName should include("Executor Run time")
details.getName should include("Total Executor Runtime")
details.getValue should be("4740000")
}
}
}
}

object GcCpuTimeHeuristicTest {
object ExecutorGcHeuristicTest {
import JavaConverters._

def newFakeHeuristicConfigurationData(params: Map[String, String] = Map.empty): HeuristicConfigurationData =
Expand Down

0 comments on commit fa62ddf

Please sign in to comment.