-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-32003][CORE] When external shuffle service is used, unregister outputs for executor on fetch failure after executor is lost #28848
Changes from all commits
e36b442
fca8a6e
973e385
b9e55a4
06ea411
17393eb
d450c3e
a8e619c
1923598
0e00862
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,9 @@ import scala.annotation.meta.param | |
import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map} | ||
import scala.util.control.NonFatal | ||
|
||
import org.mockito.Mockito.spy | ||
import org.mockito.Mockito.times | ||
import org.mockito.Mockito.verify | ||
import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits} | ||
import org.scalatest.exceptions.TestFailedException | ||
import org.scalatest.time.SpanSugar._ | ||
|
@@ -235,6 +238,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi | |
|
||
var sparkListener: EventInfoRecordingListener = null | ||
|
||
var blockManagerMaster: BlockManagerMaster = null | ||
var mapOutputTracker: MapOutputTrackerMaster = null | ||
var broadcastManager: BroadcastManager = null | ||
var securityMgr: SecurityManager = null | ||
|
@@ -248,17 +252,18 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi | |
*/ | ||
val cacheLocations = new HashMap[(Int, Int), Seq[BlockManagerId]] | ||
// stub out BlockManagerMaster.getLocations to use our cacheLocations | ||
val blockManagerMaster = new BlockManagerMaster(null, null, conf, true) { | ||
override def getLocations(blockIds: Array[BlockId]): IndexedSeq[Seq[BlockManagerId]] = { | ||
blockIds.map { | ||
_.asRDDId.map(id => (id.rddId -> id.splitIndex)).flatMap(key => cacheLocations.get(key)). | ||
getOrElse(Seq()) | ||
}.toIndexedSeq | ||
} | ||
override def removeExecutor(execId: String): Unit = { | ||
// don't need to propagate to the driver, which we don't have | ||
} | ||
class MyBlockManagerMaster(conf: SparkConf) extends BlockManagerMaster(null, null, conf, true) { | ||
override def getLocations(blockIds: Array[BlockId]): IndexedSeq[Seq[BlockManagerId]] = { | ||
blockIds.map { | ||
_.asRDDId.map { id => (id.rddId -> id.splitIndex) | ||
}.flatMap { key => cacheLocations.get(key) | ||
}.getOrElse(Seq()) | ||
Comment on lines
+258
to
+260
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unnecessary change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @squito requested this change, to conform with what he said was the preferred scala style (map or flatMap with "=>" should use braces rather than parentheses). |
||
}.toIndexedSeq | ||
} | ||
override def removeExecutor(execId: String): Unit = { | ||
// don't need to propagate to the driver, which we don't have | ||
} | ||
} | ||
|
||
/** The list of results that DAGScheduler has collected. */ | ||
val results = new HashMap[Int, Any]() | ||
|
@@ -276,6 +281,16 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi | |
override def jobFailed(exception: Exception): Unit = { failure = exception } | ||
} | ||
|
||
class MyMapOutputTrackerMaster( | ||
conf: SparkConf, | ||
broadcastManager: BroadcastManager) | ||
extends MapOutputTrackerMaster(conf, broadcastManager, true) { | ||
|
||
override def sendTracker(message: Any): Unit = { | ||
// no-op, just so we can stop this to avoid leaking threads | ||
} | ||
} | ||
|
||
override def beforeEach(): Unit = { | ||
super.beforeEach() | ||
init(new SparkConf()) | ||
|
@@ -293,11 +308,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi | |
results.clear() | ||
securityMgr = new SecurityManager(conf) | ||
broadcastManager = new BroadcastManager(true, conf, securityMgr) | ||
mapOutputTracker = new MapOutputTrackerMaster(conf, broadcastManager, true) { | ||
override def sendTracker(message: Any): Unit = { | ||
// no-op, just so we can stop this to avoid leaking threads | ||
} | ||
} | ||
mapOutputTracker = spy(new MyMapOutputTrackerMaster(conf, broadcastManager)) | ||
blockManagerMaster = spy(new MyBlockManagerMaster(conf)) | ||
scheduler = new DAGScheduler( | ||
sc, | ||
taskScheduler, | ||
|
@@ -548,6 +560,56 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi | |
assert(mapStatus2(2).location.host === "hostB") | ||
} | ||
|
||
test("SPARK-32003: All shuffle files for executor should be cleaned up on fetch failure") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit. Do we need to describe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. personally, I'm OK with this as is, I think its OK for some of the details to be down in the test itself and balance a super-duper long name. @dongjoon-hyun since you called this a nit I'm assuming you're OK with me merging this anyhow, but if not lemme know, can submit a quick followup. |
||
// reset the test context with the right shuffle service config | ||
afterEach() | ||
wypoon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
val conf = new SparkConf() | ||
conf.set(config.SHUFFLE_SERVICE_ENABLED.key, "true") | ||
init(conf) | ||
|
||
val shuffleMapRdd = new MyRDD(sc, 3, Nil) | ||
val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(3)) | ||
val shuffleId = shuffleDep.shuffleId | ||
val reduceRdd = new MyRDD(sc, 3, List(shuffleDep), tracker = mapOutputTracker) | ||
|
||
submit(reduceRdd, Array(0, 1, 2)) | ||
// Map stage completes successfully, | ||
// two tasks are run on an executor on hostA and one on an executor on hostB | ||
completeShuffleMapStageSuccessfully(0, 0, 3, Seq("hostA", "hostA", "hostB")) | ||
// Now the executor on hostA is lost | ||
runEvent(ExecutorLost("hostA-exec", ExecutorExited(-100, false, "Container marked as failed"))) | ||
// Executor is removed but shuffle files are not unregistered | ||
verify(blockManagerMaster, times(1)).removeExecutor("hostA-exec") | ||
verify(mapOutputTracker, times(0)).removeOutputsOnExecutor("hostA-exec") | ||
|
||
// The MapOutputTracker has all the shuffle files | ||
val mapStatuses = mapOutputTracker.shuffleStatuses(shuffleId).mapStatuses | ||
assert(mapStatuses.count(_ != null) === 3) | ||
assert(mapStatuses.count(s => s != null && s.location.executorId == "hostA-exec") === 2) | ||
assert(mapStatuses.count(s => s != null && s.location.executorId == "hostB-exec") === 1) | ||
|
||
// Now a fetch failure from the lost executor occurs | ||
complete(taskSets(1), Seq( | ||
(FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"), null) | ||
)) | ||
// blockManagerMaster.removeExecutor is not called again | ||
// but shuffle files are unregistered | ||
verify(blockManagerMaster, times(1)).removeExecutor("hostA-exec") | ||
verify(mapOutputTracker, times(1)).removeOutputsOnExecutor("hostA-exec") | ||
|
||
// Shuffle files for hostA-exec should be lost | ||
assert(mapStatuses.count(_ != null) === 1) | ||
assert(mapStatuses.count(s => s != null && s.location.executorId == "hostA-exec") === 0) | ||
assert(mapStatuses.count(s => s != null && s.location.executorId == "hostB-exec") === 1) | ||
|
||
// Additional fetch failure from the executor does not result in further call to | ||
// mapOutputTracker.removeOutputsOnExecutor | ||
complete(taskSets(1), Seq( | ||
(FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 1, 0, "ignored"), null) | ||
)) | ||
verify(mapOutputTracker, times(1)).removeOutputsOnExecutor("hostA-exec") | ||
} | ||
|
||
test("zero split job") { | ||
var numResults = 0 | ||
var failureReason: Option[Exception] = None | ||
|
@@ -765,8 +827,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi | |
complete(taskSets(1), Seq( | ||
(Success, 42), | ||
(FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"), null))) | ||
// this will get called | ||
// blockManagerMaster.removeExecutor("hostA-exec") | ||
verify(blockManagerMaster, times(1)).removeExecutor("hostA-exec") | ||
// ask the scheduler to try it again | ||
scheduler.resubmitFailedStages() | ||
// have the 2nd attempt pass | ||
|
@@ -806,11 +867,14 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi | |
submit(reduceRdd, Array(0)) | ||
completeShuffleMapStageSuccessfully(0, 0, 1) | ||
runEvent(ExecutorLost("hostA-exec", event)) | ||
verify(blockManagerMaster, times(1)).removeExecutor("hostA-exec") | ||
if (expectFileLoss) { | ||
verify(mapOutputTracker, times(1)).removeOutputsOnExecutor("hostA-exec") | ||
intercept[MetadataFetchFailedException] { | ||
mapOutputTracker.getMapSizesByExecutorId(shuffleId, 0) | ||
} | ||
} else { | ||
verify(mapOutputTracker, times(0)).removeOutputsOnExecutor("hostA-exec") | ||
assert(mapOutputTracker.getMapSizesByExecutorId(shuffleId, 0).map(_._1).toSet === | ||
HashSet(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
;
->,
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I deliberately used a semicolon; it was not a typo.