Skip to content

Commit

Permalink
[SPARK-4525] Mesos should decline unused offers
Browse files Browse the repository at this point in the history
Functionally, this is just a small change on top of #3393 (by jongyoul). The issue being addressed is discussed in the comments there. I have not yet added a test for the bug there. I will add one shortly.

I've also done some minor renaming/clean-up of variables in this class and tests.

Author: Patrick Wendell <pwendell@gmail.com>
Author: Jongyoul Lee <jongyoul@gmail.com>

Closes #3436 from pwendell/mesos-issue and squashes the following commits:

58c35b5 [Patrick Wendell] Adding unit test for this situation
c4f0697 [Patrick Wendell] Additional clean-up and fixes on top of existing fix
f20f1b3 [Jongyoul Lee] [SPARK-4525] MesosSchedulerBackend.resourceOffers cannot decline unused offers from acceptedOffers - Added code for declining unused offers among acceptedOffers - Edited testCase for checking declining unused offers
  • Loading branch information
pwendell committed Nov 25, 2014
1 parent d24d5bf commit b043c27
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,22 +208,25 @@ private[spark] class MesosSchedulerBackend(
*/
override def resourceOffers(d: SchedulerDriver, offers: JList[Offer]) {
inClassLoader() {
val (acceptedOffers, declinedOffers) = offers.partition { o =>
// Fail-fast on offers we know will be rejected
val (usableOffers, unUsableOffers) = offers.partition { o =>
val mem = getResource(o.getResourcesList, "mem")
val cpus = getResource(o.getResourcesList, "cpus")
val slaveId = o.getSlaveId.getValue
// TODO(pwendell): Should below be 1 + scheduler.CPUS_PER_TASK?
(mem >= MemoryUtils.calculateTotalMemory(sc) &&
// need at least 1 for executor, 1 for task
cpus >= 2 * scheduler.CPUS_PER_TASK) ||
(slaveIdsWithExecutors.contains(slaveId) &&
cpus >= scheduler.CPUS_PER_TASK)
}

val offerableWorkers = acceptedOffers.map { o =>
val workerOffers = usableOffers.map { o =>
val cpus = if (slaveIdsWithExecutors.contains(o.getSlaveId.getValue)) {
getResource(o.getResourcesList, "cpus").toInt
} else {
// If the executor doesn't exist yet, subtract CPU for executor
// TODO(pwendell): Should below just subtract "1"?
getResource(o.getResourcesList, "cpus").toInt -
scheduler.CPUS_PER_TASK
}
Expand All @@ -233,17 +236,20 @@ private[spark] class MesosSchedulerBackend(
cpus)
}

val slaveIdToOffer = acceptedOffers.map(o => o.getSlaveId.getValue -> o).toMap
val slaveIdToOffer = usableOffers.map(o => o.getSlaveId.getValue -> o).toMap

val mesosTasks = new HashMap[String, JArrayList[MesosTaskInfo]]

val slavesIdsOfAcceptedOffers = HashSet[String]()

// Call into the TaskSchedulerImpl
scheduler.resourceOffers(offerableWorkers)
.filter(!_.isEmpty)
val acceptedOffers = scheduler.resourceOffers(workerOffers).filter(!_.isEmpty)
acceptedOffers
.foreach { offer =>
offer.foreach { taskDesc =>
val slaveId = taskDesc.executorId
slaveIdsWithExecutors += slaveId
slavesIdsOfAcceptedOffers += slaveId
taskIdToSlaveId(taskDesc.taskId) = slaveId
mesosTasks.getOrElseUpdate(slaveId, new JArrayList[MesosTaskInfo])
.add(createMesosTask(taskDesc, slaveId))
Expand All @@ -257,7 +263,14 @@ private[spark] class MesosSchedulerBackend(
d.launchTasks(Collections.singleton(slaveIdToOffer(slaveId).getId), tasks, filters)
}

declinedOffers.foreach(o => d.declineOffer(o.getId))
// Decline offers that weren't used
// NOTE: This logic assumes that we only get a single offer for each host in a given batch
for (o <- usableOffers if !slavesIdsOfAcceptedOffers.contains(o.getSlaveId.getValue)) {
d.declineOffer(o.getId)
}

// Decline offers we ruled out immediately
unUsableOffers.foreach(o => d.declineOffer(o.getId))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ import java.nio.ByteBuffer
import java.util.Collections
import java.util
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with EasyMockSugar {
test("mesos resource offer is launching tasks") {

test("mesos resource offers result in launching tasks") {
def createOffer(id: Int, mem: Int, cpu: Int) = {
val builder = Offer.newBuilder()
builder.addResourcesBuilder()
Expand All @@ -43,52 +45,81 @@ class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with Ea
.setName("cpus")
.setType(Value.Type.SCALAR)
.setScalar(Scalar.newBuilder().setValue(cpu))
builder.setId(OfferID.newBuilder().setValue(id.toString).build()).setFrameworkId(FrameworkID.newBuilder().setValue("f1"))
.setSlaveId(SlaveID.newBuilder().setValue("s1")).setHostname("localhost").build()
builder.setId(OfferID.newBuilder().setValue(s"o${id.toString}").build()).setFrameworkId(FrameworkID.newBuilder().setValue("f1"))
.setSlaveId(SlaveID.newBuilder().setValue(s"s${id.toString}")).setHostname(s"host${id.toString}").build()
}

val driver = EasyMock.createMock(classOf[SchedulerDriver])
val taskScheduler = EasyMock.createMock(classOf[TaskSchedulerImpl])

val sc = EasyMock.createMock(classOf[SparkContext])

EasyMock.expect(sc.executorMemory).andReturn(100).anyTimes()
EasyMock.expect(sc.getSparkHome()).andReturn(Option("/path")).anyTimes()
EasyMock.expect(sc.executorEnvs).andReturn(new mutable.HashMap).anyTimes()
EasyMock.expect(sc.conf).andReturn(new SparkConf).anyTimes()
EasyMock.replay(sc)

val minMem = MemoryUtils.calculateTotalMemory(sc).toInt
val minCpu = 4
val offers = new java.util.ArrayList[Offer]
offers.add(createOffer(1, minMem, minCpu))
offers.add(createOffer(1, minMem - 1, minCpu))

val mesosOffers = new java.util.ArrayList[Offer]
mesosOffers.add(createOffer(1, minMem, minCpu))
mesosOffers.add(createOffer(2, minMem - 1, minCpu))
mesosOffers.add(createOffer(3, minMem, minCpu))

val backend = new MesosSchedulerBackend(taskScheduler, sc, "master")
val workerOffers = Seq(offers.get(0)).map(o => new WorkerOffer(
o.getSlaveId.getValue,
o.getHostname,

val expectedWorkerOffers = new ArrayBuffer[WorkerOffer](2)
expectedWorkerOffers.append(new WorkerOffer(
mesosOffers.get(0).getSlaveId.getValue,
mesosOffers.get(0).getHostname,
2
))
expectedWorkerOffers.append(new WorkerOffer(
mesosOffers.get(2).getSlaveId.getValue,
mesosOffers.get(2).getHostname,
2
))
val taskDesc = new TaskDescription(1L, "s1", "n1", 0, ByteBuffer.wrap(new Array[Byte](0)))
EasyMock.expect(taskScheduler.resourceOffers(EasyMock.eq(workerOffers))).andReturn(Seq(Seq(taskDesc)))
EasyMock.expect(taskScheduler.resourceOffers(EasyMock.eq(expectedWorkerOffers))).andReturn(Seq(Seq(taskDesc)))
EasyMock.expect(taskScheduler.CPUS_PER_TASK).andReturn(2).anyTimes()
EasyMock.replay(taskScheduler)

val capture = new Capture[util.Collection[TaskInfo]]
EasyMock.expect(
driver.launchTasks(
EasyMock.eq(Collections.singleton(offers.get(0).getId)),
EasyMock.eq(Collections.singleton(mesosOffers.get(0).getId)),
EasyMock.capture(capture),
EasyMock.anyObject(classOf[Filters])
)
).andReturn(Status.valueOf(1))
EasyMock.expect(driver.declineOffer(offers.get(1).getId)).andReturn(Status.valueOf(1))
).andReturn(Status.valueOf(1)).once
EasyMock.expect(driver.declineOffer(mesosOffers.get(1).getId)).andReturn(Status.valueOf(1)).times(1)
EasyMock.expect(driver.declineOffer(mesosOffers.get(2).getId)).andReturn(Status.valueOf(1)).times(1)
EasyMock.replay(driver)
backend.resourceOffers(driver, offers)

backend.resourceOffers(driver, mesosOffers)

EasyMock.verify(driver)
assert(capture.getValue.size() == 1)
val taskInfo = capture.getValue.iterator().next()
assert(taskInfo.getName.equals("n1"))
val cpus = taskInfo.getResourcesList.get(0)
assert(cpus.getName.equals("cpus"))
assert(cpus.getScalar.getValue.equals(2.0))
assert(taskInfo.getSlaveId.getValue.equals("s1"))

// Unwanted resources offered on an existing node. Make sure they are declined
val mesosOffers2 = new java.util.ArrayList[Offer]
mesosOffers2.add(createOffer(1, minMem, minCpu))
EasyMock.reset(taskScheduler)
EasyMock.reset(driver)
EasyMock.expect(taskScheduler.resourceOffers(EasyMock.anyObject(classOf[Seq[WorkerOffer]])).andReturn(Seq(Seq())))
EasyMock.expect(taskScheduler.CPUS_PER_TASK).andReturn(2).anyTimes()
EasyMock.replay(taskScheduler)
EasyMock.expect(driver.declineOffer(mesosOffers2.get(0).getId)).andReturn(Status.valueOf(1)).times(1)
EasyMock.replay(driver)

backend.resourceOffers(driver, mesosOffers2)
EasyMock.verify(driver)
}
}

0 comments on commit b043c27

Please sign in to comment.