-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-20464][SS] Add a job group and description for streaming queries and fix cancellation of running jobs using the job group #17765
Changes from 6 commits
07e182b
bd13a01
7a58547
6ab66e2
992d68f
915d67b
f9342c9
6e66638
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -252,6 +252,8 @@ class StreamExecution( | |
*/ | ||
private def runBatches(): Unit = { | ||
try { | ||
sparkSession.sparkContext.setJobGroup(runId.toString, getBatchDescriptionString, | ||
interruptOnCancel = true) | ||
if (sparkSession.sessionState.conf.streamingMetricsEnabled) { | ||
sparkSession.sparkContext.env.metricsSystem.registerSource(streamMetrics) | ||
} | ||
|
@@ -289,6 +291,7 @@ class StreamExecution( | |
if (currentBatchId < 0) { | ||
// We'll do this initialization only once | ||
populateStartOffsets(sparkSessionToRunBatches) | ||
sparkSession.sparkContext.setJobDescription(getBatchDescriptionString) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update job description with correct |
||
logDebug(s"Stream running from $committedOffsets to $availableOffsets") | ||
} else { | ||
constructNextBatch() | ||
|
@@ -308,6 +311,7 @@ class StreamExecution( | |
logDebug(s"batch ${currentBatchId} committed") | ||
// We'll increase currentBatchId after we complete processing current batch's data | ||
currentBatchId += 1 | ||
sparkSession.sparkContext.setJobDescription(getBatchDescriptionString) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update job description with updated |
||
} else { | ||
currentStatus = currentStatus.copy(isDataAvailable = false) | ||
updateStatusMessage("Waiting for data to arrive") | ||
|
@@ -684,8 +688,11 @@ class StreamExecution( | |
// intentionally | ||
state.set(TERMINATED) | ||
if (microBatchThread.isAlive) { | ||
sparkSession.sparkContext.cancelJobGroup(runId.toString) | ||
microBatchThread.interrupt() | ||
microBatchThread.join() | ||
// microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak | ||
sparkSession.sparkContext.cancelJobGroup(runId.toString) | ||
} | ||
logInfo(s"Query $prettyIdString was stopped") | ||
} | ||
|
@@ -825,6 +832,11 @@ class StreamExecution( | |
} | ||
} | ||
|
||
private def getBatchDescriptionString: String = { | ||
val batchDescription = if (currentBatchId < 0) "init" else currentBatchId.toString | ||
Option(name).map(_ + " ").getOrElse("") + | ||
s"[batch = $batchDescription,<br/>id = $id,<br/>runId = $runId]" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would get rid of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, updated. |
||
} | ||
} | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tdas Is this change okay? Need it to add line breaks in the job description cells.