diff --git a/app-conf/HeuristicConf.xml b/app-conf/HeuristicConf.xml index 5a9ba82e8..3e264e853 100644 --- a/app-conf/HeuristicConf.xml +++ b/app-conf/HeuristicConf.xml @@ -195,9 +195,9 @@ spark - Spark GC Time to Run Time - com.linkedin.drelephant.spark.heuristics.GcCpuTimeHeuristic - views.html.help.spark.helpGcCpuTimeHeuristic + Executor GC + com.linkedin.drelephant.spark.heuristics.ExecutorGcHeuristic + views.html.help.spark.helpExecutorGcHeuristic diff --git a/app/com/linkedin/drelephant/spark/heuristics/GcCpuTimeHeuristic.scala b/app/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristic.scala similarity index 77% rename from app/com/linkedin/drelephant/spark/heuristics/GcCpuTimeHeuristic.scala rename to app/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristic.scala index b547adb8f..c3658f168 100644 --- a/app/com/linkedin/drelephant/spark/heuristics/GcCpuTimeHeuristic.scala +++ b/app/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristic.scala @@ -25,12 +25,12 @@ import com.linkedin.drelephant.spark.data.SparkApplicationData import scala.collection.JavaConverters /** - * A heuristic based on GC time and CPU run time + * A heuristic based on GC time and CPU run time. It calculates the ratio of the total time a job spends in GC to the total run time of a job and warns if too much time is spent in GC. */ -class GcCpuTimeHeuristic(private val heuristicConfigurationData: HeuristicConfigurationData) +class ExecutorGcHeuristic(private val heuristicConfigurationData: HeuristicConfigurationData) extends Heuristic[SparkApplicationData] { - import GcCpuTimeHeuristic._ + import ExecutorGcHeuristic._ import JavaConverters._ val gcSeverityAThresholds: SeverityThresholds = @@ -47,23 +47,23 @@ class GcCpuTimeHeuristic(private val heuristicConfigurationData: HeuristicConfig val evaluator = new Evaluator(this, data) var resultDetails = Seq( new HeuristicResultDetails("GC time to Executor Run time ratio", evaluator.ratio.toString), - new HeuristicResultDetails("GC total time", evaluator.jvmTime.toString), - new HeuristicResultDetails("Executor Run time", evaluator.executorRunTimeTotal.toString) + new HeuristicResultDetails("Total GC time", evaluator.jvmTime.toString), + new HeuristicResultDetails("Total Executor Runtime", evaluator.executorRunTimeTotal.toString) ) //adding recommendations to the result, severityTimeA corresponds to the ascending severity calculation if (evaluator.severityTimeA.getValue > Severity.LOW.getValue) { - resultDetails = resultDetails :+ new HeuristicResultDetails("Note", "The ratio of JVM GC Time and executor Time is above normal, we recommend to increase the executor memory") + resultDetails = resultDetails :+ new HeuristicResultDetails("Gc ratio high", "The job is spending too much time on GC. We recommend increasing the executor memory.") } //severityTimeD corresponds to the descending severity calculation if (evaluator.severityTimeD.getValue > Severity.LOW.getValue) { - resultDetails = resultDetails :+ new HeuristicResultDetails("Note", "The ratio of JVM GC Time and executor Time is below normal, we recommend to decrease the executor memory") + resultDetails = resultDetails :+ new HeuristicResultDetails("Gc ratio low", "The job is spending too less time in GC. Please check if you have asked for more executor memory than required.") } val result = new HeuristicResult( heuristicConfigurationData.getClassName, heuristicConfigurationData.getHeuristicName, - evaluator.severity, + evaluator.severityTimeA, 0, resultDetails.asJava ) @@ -71,7 +71,7 @@ class GcCpuTimeHeuristic(private val heuristicConfigurationData: HeuristicConfig } } -object GcCpuTimeHeuristic { +object ExecutorGcHeuristic { val SPARK_EXECUTOR_MEMORY = "spark.executor.memory" val SPARK_EXECUTOR_CORES = "spark.executor.cores" @@ -88,17 +88,17 @@ object GcCpuTimeHeuristic { val GC_SEVERITY_A_THRESHOLDS_KEY: String = "gc_severity_A_threshold" val GC_SEVERITY_D_THRESHOLDS_KEY: String = "gc_severity_D_threshold" - class Evaluator(gcCpuTimeHeuristic: GcCpuTimeHeuristic, data: SparkApplicationData) { - lazy val executorSummaries: Seq[ExecutorSummary] = data.executorSummaries + class Evaluator(executorGcHeuristic: ExecutorGcHeuristic, data: SparkApplicationData) { + lazy val executorAndDriverSummaries: Seq[ExecutorSummary] = data.executorSummaries + lazy val executorSummaries: Seq[ExecutorSummary] = executorAndDriverSummaries.filterNot(_.id.equals("driver")) lazy val appConfigurationProperties: Map[String, String] = data.appConfigurationProperties var (jvmTime, executorRunTimeTotal) = getTimeValues(executorSummaries) var ratio: Double = jvmTime.toDouble / executorRunTimeTotal.toDouble - lazy val severityTimeA: Severity = gcCpuTimeHeuristic.gcSeverityAThresholds.severityOf(ratio) - lazy val severityTimeD: Severity = gcCpuTimeHeuristic.gcSeverityAThresholds.severityOf(ratio) - lazy val severity : Severity = Severity.max(severityTimeA, severityTimeD) + lazy val severityTimeA: Severity = executorGcHeuristic.gcSeverityAThresholds.severityOf(ratio) + lazy val severityTimeD: Severity = executorGcHeuristic.gcSeverityAThresholds.severityOf(ratio) /** * returns the total JVM GC Time and total executor Run Time across all stages diff --git a/app/views/help/spark/helpExecutorGcHeuristic.scala.html b/app/views/help/spark/helpExecutorGcHeuristic.scala.html new file mode 100644 index 000000000..02ca91ac7 --- /dev/null +++ b/app/views/help/spark/helpExecutorGcHeuristic.scala.html @@ -0,0 +1,20 @@ +@* +* Copyright 2016 LinkedIn Corp. +* +* Licensed under the Apache License, Version 2.0 (the "License"); you may not +* use this file except in compliance with the License. You may obtain a copy of +* the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations under +* the License. +*@ + +

This analysis shows how much time a job is spending in GC. To normalise the results across all jobs, the ratio of the time a job spends in Gc to the total run time of the job is calculated.

+

A job is flagged if the ratio is too high, meaning the job spends too much time in GC.

+

Suggestions

+

We recommend increasing the executor memory.

\ No newline at end of file diff --git a/app/views/help/spark/helpGcCpuTimeHeuristic.scala.html b/app/views/help/spark/helpGcCpuTimeHeuristic.scala.html deleted file mode 100644 index 646b186d6..000000000 --- a/app/views/help/spark/helpGcCpuTimeHeuristic.scala.html +++ /dev/null @@ -1,12 +0,0 @@ -

The ratio of jvmGcTime to executorRunTime is checked, to see if GC is taking too much time (providing more memory could help) or too little time (memory may be over provisioned, and can be reduced).

-

The severity thresholds are as follows :

-

Low: avg (jvmGcTime / executorRunTime) >= .08

-

Moderate: avg (jvmGcTime / executorRunTime) >= .1

-

Critical: avg (jvmGcTime / executorRunTime) >= .15

-

Severe:avg (jvmGcTime / executorRunTime) >= .2

-

The severity thresholds in case it is taking too little time are as follows:

-

Low: avg (jvmGcTime / executorRunTime) < .05)

-

Moderate: avg (jvmGcTime / executorRunTime) < .04)

-

Critical: avg (jvmGcTime / executorRunTime) < .03)

-

Severe: avg (jvmGcTime / executorRunTime) < .01)

- diff --git a/test/com/linkedin/drelephant/spark/heuristics/ExecutorStorageSpillHeuristicTest.scala b/test/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristicTest.scala similarity index 90% rename from test/com/linkedin/drelephant/spark/heuristics/ExecutorStorageSpillHeuristicTest.scala rename to test/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristicTest.scala index 31a2d1635..869b9cb67 100644 --- a/test/com/linkedin/drelephant/spark/heuristics/ExecutorStorageSpillHeuristicTest.scala +++ b/test/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristicTest.scala @@ -27,10 +27,10 @@ import org.scalatest.{FunSpec, Matchers} import scala.concurrent.duration.Duration -class GcCpuTimeHeuristicTest extends FunSpec with Matchers { - import GcCpuTimeHeuristicTest._ +class ExecutorGcHeuristicTest extends FunSpec with Matchers { + import ExecutorGcHeuristicTest._ - describe("GcCpuTimeHeuristic") { + describe("ExecutorGcHeuristic") { val heuristicConfigurationData = newFakeHeuristicConfigurationData( Map( "max_to_median_ratio_severity_thresholds" -> "1.414,2,4,16", @@ -38,7 +38,7 @@ class GcCpuTimeHeuristicTest extends FunSpec with Matchers { "ignore_max_millis_less_than_threshold" -> "4000001" ) ) - val gcCpuTimeHeuristic = new GcCpuTimeHeuristic(heuristicConfigurationData) + val executorGcHeuristic = new ExecutorGcHeuristic(heuristicConfigurationData) val executorSummaries = Seq( newFakeExecutorSummary( @@ -65,7 +65,7 @@ class GcCpuTimeHeuristicTest extends FunSpec with Matchers { describe(".apply") { val data1 = newFakeSparkApplicationData(executorSummaries) - val heuristicResult = gcCpuTimeHeuristic.apply(data1) + val heuristicResult = executorGcHeuristic.apply(data1) val heuristicResultDetails = heuristicResult.getHeuristicResultDetails it("returns the severity") { @@ -80,20 +80,20 @@ class GcCpuTimeHeuristicTest extends FunSpec with Matchers { it("returns the total GC time") { val details = heuristicResultDetails.get(1) - details.getName should include("GC total time") + details.getName should include("Total GC time") details.getValue should be("1200000") } it("returns the executor's run time") { val details = heuristicResultDetails.get(2) - details.getName should include("Executor Run time") + details.getName should include("Total Executor Runtime") details.getValue should be("4740000") } } } } -object GcCpuTimeHeuristicTest { +object ExecutorGcHeuristicTest { import JavaConverters._ def newFakeHeuristicConfigurationData(params: Map[String, String] = Map.empty): HeuristicConfigurationData =