-
Notifications
You must be signed in to change notification settings - Fork 188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Slightly Smarter Task Shuffling #2057
Merged
Merged
Changes from 13 commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
4f7caa7
shuffle lowest memory utilization tasks instead of highest
7cfd013
modify unit test to reflect new shuffling logic
f21141a
slight refactor, postpone larger refactor
99b1770
raw refactor of shuffle logic
48e12a0
cleanup shuffle refactor, wire into SingularityUsagePoller
bfcae86
method consistency for CPU/memory shuffles
e5fbb6b
resolve checkstyle violations
54ef4bc
uniform action id per task shuffle
48c385f
minimal viable shuffle unit tests
0b9eafa
fix off by one error in shuffle task count check
1881eb8
actually count initial # of shuffling tasks per host
2d6dae8
correctly update overusage, attempt to ensure that slave reaches reso…
c3ebb94
rearrange helper test methods
5c03641
fix bug in initial CPU utilization calculation + memory shuffle unit …
0e344c8
more thorough shuffle unit test
335c3c0
attempts to use helper methods in shuffle unit testing have failed
7a5ba4f
memory shuffle should take into account external memory pressure
52d3dca
add slightly more logging if shuffling >= 1 slave
3745584
sanity check logging, remove after sanity regained
40d3b59
move shuffle debug logging
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
343 changes: 343 additions & 0 deletions
343
...arityService/src/main/java/com/hubspot/singularity/scheduler/SingularityTaskShuffler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,343 @@ | ||
package com.hubspot.singularity.scheduler; | ||
|
||
import com.google.inject.Inject; | ||
import com.hubspot.singularity.SingularityPendingRequest; | ||
import com.hubspot.singularity.SingularityPendingRequest.PendingType; | ||
import com.hubspot.singularity.SingularitySlaveUsage; | ||
import com.hubspot.singularity.SingularityTaskCleanup; | ||
import com.hubspot.singularity.TaskCleanupType; | ||
import com.hubspot.singularity.config.SingularityConfiguration; | ||
import com.hubspot.singularity.data.RequestManager; | ||
import com.hubspot.singularity.data.TaskManager; | ||
import com.hubspot.singularity.scheduler.SingularityTaskShuffler.OverusedResource.Type; | ||
import io.dropwizard.util.SizeUnit; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.Optional; | ||
import java.util.UUID; | ||
import java.util.stream.Collectors; | ||
|
||
public class SingularityTaskShuffler { | ||
private static final Logger LOG = LoggerFactory.getLogger(SingularityTaskShuffler.class); | ||
|
||
private final SingularityConfiguration configuration; | ||
private final RequestManager requestManager; | ||
private final TaskManager taskManager; | ||
|
||
@Inject | ||
SingularityTaskShuffler(SingularityConfiguration configuration, | ||
RequestManager requestManager, | ||
TaskManager taskManager) { | ||
this.configuration = configuration; | ||
this.requestManager = requestManager; | ||
this.taskManager = taskManager; | ||
} | ||
|
||
static class OverusedResource { | ||
enum Type {MEMORY, CPU} | ||
|
||
double usage; | ||
double total; | ||
Type resourceType; | ||
|
||
OverusedResource(double usage, double total, Type resourceType) { | ||
this.usage = usage; | ||
this.total = total; | ||
this.resourceType = resourceType; | ||
} | ||
|
||
public double getOverusageRatio() { | ||
return usage / total; | ||
} | ||
|
||
public static int prioritize(OverusedResource r1, OverusedResource r2) { | ||
if (r1.resourceType == r2.resourceType) { | ||
return Double.compare(r2.getOverusageRatio(), r1.getOverusageRatio()); | ||
} else if (r1.resourceType == Type.MEMORY) { | ||
return -1; | ||
} else { | ||
return 1; | ||
} | ||
} | ||
|
||
public double match(double cpuUsage, double memUsageBytes) { | ||
if (resourceType == Type.CPU) { | ||
return cpuUsage; | ||
} else { | ||
return memUsageBytes; | ||
} | ||
} | ||
|
||
public boolean exceeds(double cpuUsage, double memUsageBytes) { | ||
return usage > match(cpuUsage, memUsageBytes); | ||
} | ||
|
||
public void updateOverusage(double cpuUsageDelta, double memUsageDelta) { | ||
usage -= match(cpuUsageDelta, memUsageDelta); | ||
} | ||
|
||
public TaskCleanupType toTaskCleanupType() { | ||
if (resourceType == Type.CPU) { | ||
return TaskCleanupType.REBALANCE_CPU_USAGE; | ||
} else { | ||
return TaskCleanupType.REBALANCE_MEMORY_USAGE; | ||
} | ||
} | ||
} | ||
|
||
static class OverusedSlave { | ||
SingularitySlaveUsage usage; | ||
List<TaskIdWithUsage> tasks; | ||
OverusedResource resource; | ||
|
||
OverusedSlave(SingularitySlaveUsage usage, List<TaskIdWithUsage> tasks, OverusedResource resource) { | ||
this.usage = usage; | ||
this.tasks = tasks; | ||
this.resource = resource; | ||
} | ||
} | ||
|
||
public void shuffle(Map<SingularitySlaveUsage, List<TaskIdWithUsage>> overloadedHosts) { | ||
List<OverusedSlave> slavesToShuffle = overloadedHosts.entrySet().stream() | ||
.map((entry) -> new OverusedSlave(entry.getKey(), entry.getValue(), getMostOverusedResource(entry.getKey()))) | ||
.sorted((s1, s2) -> OverusedResource.prioritize(s1.resource, s2.resource)) | ||
.collect(Collectors.toList()); | ||
|
||
List<SingularityTaskCleanup> shufflingTasks = getShufflingTasks(); | ||
Set<String> shufflingRequests = getAssociatedRequestIds(shufflingTasks); | ||
Map<String, Long> shufflingTasksPerHost = getShufflingTaskCountPerHost(shufflingTasks); | ||
long shufflingTasksOnCluster = shufflingTasks.size(); | ||
|
||
for (OverusedSlave slave : slavesToShuffle) { | ||
if (shufflingTasksOnCluster >= configuration.getMaxTasksToShuffleTotal()) { | ||
LOG.debug("Not shuffling any more tasks (totalShuffleCleanups: {})", shufflingTasksOnCluster); | ||
break; | ||
} | ||
|
||
TaskCleanupType shuffleCleanupType = slave.resource.toTaskCleanupType(); | ||
List<TaskIdWithUsage> shuffleCandidates = getPrioritizedShuffleCandidates(slave); | ||
|
||
long shufflingTasksOnSlave = shufflingTasksPerHost.getOrDefault(getHostId(slave).orElse(""), 0L); | ||
long availableTasksOnSlave = shuffleCandidates.size(); | ||
double cpuUsage = getSystemLoadForShuffle(slave.usage); | ||
double memUsageBytes = slave.usage.getMemoryBytesUsed(); | ||
|
||
for (TaskIdWithUsage task : shuffleCandidates) { | ||
availableTasksOnSlave--; | ||
|
||
if (shufflingRequests.contains(task.getTaskId().getRequestId())) { | ||
LOG.debug("Request {} already has a shuffling task, skipping", task.getTaskId().getRequestId()); | ||
continue; | ||
} | ||
|
||
boolean resourceNotOverused = !isOverutilized(slave, cpuUsage, memUsageBytes); | ||
boolean tooManyShufflingTasks = isShufflingTooManyTasks(shufflingTasksOnSlave, shufflingTasksOnCluster); | ||
double taskCpuUsage = task.getUsage().getCpusUsed(); | ||
double taskMemUsage = task.getUsage().getMemoryTotalBytes(); | ||
|
||
if (resourceNotOverused || tooManyShufflingTasks) { | ||
LOG.debug("Not shuffling any more tasks on slave {} ({} overage : {}%, shuffledOnHost: {}, totalShuffleCleanups: {})", | ||
task.getTaskId().getSanitizedHost(), | ||
slave.resource.resourceType, | ||
slave.resource.getOverusageRatio() * 100, | ||
shufflingTasksOnSlave, | ||
shufflingTasksOnCluster | ||
); | ||
break; | ||
} | ||
|
||
long availableShufflesOnSlave = configuration.getMaxTasksToShufflePerHost() - shufflingTasksOnSlave; | ||
if (availableShufflesOnSlave == 1 && availableTasksOnSlave > 0 && slave.resource.exceeds(taskCpuUsage, taskMemUsage)) { | ||
LOG.debug("Skipping shuffling task {} on slave {} to reach threshold ({} overage : {}%, shuffledOnHost: {}, totalShuffleCleanups: {})", | ||
task.getTaskId().getId(), | ||
task.getTaskId().getSanitizedHost(), | ||
slave.resource.resourceType, | ||
slave.resource.getOverusageRatio() * 100, | ||
shufflingTasksOnSlave, | ||
shufflingTasksOnCluster | ||
); | ||
continue; | ||
} | ||
|
||
String message = getShuffleMessage(slave, task, cpuUsage, memUsageBytes); | ||
bounce(task, shuffleCleanupType, Optional.of(message)); | ||
|
||
cpuUsage -= taskCpuUsage; | ||
memUsageBytes -= taskMemUsage; | ||
slave.resource.updateOverusage(taskCpuUsage, taskMemUsage); | ||
|
||
shufflingTasksOnSlave++; | ||
shufflingTasksOnCluster++; | ||
shufflingRequests.add(task.getTaskId().getRequestId()); | ||
} | ||
} | ||
} | ||
|
||
private List<TaskIdWithUsage> getPrioritizedShuffleCandidates(OverusedSlave slave) { | ||
// SingularityUsageHelper ensures that requests flagged as always ineligible for shuffling have been filtered out. | ||
List<TaskIdWithUsage> out = slave.tasks; | ||
|
||
if (slave.resource.resourceType == Type.CPU) { | ||
out.sort((u1, u2) -> Double.compare( | ||
getUtilizationScoreForCPUShuffle(u2), | ||
getUtilizationScoreForCPUShuffle(u1) | ||
)); | ||
} else { | ||
out.sort((u1, u2) -> Double.compare( | ||
getUtilizationScoreForMemoryShuffle(u1), | ||
getUtilizationScoreForMemoryShuffle(u2) | ||
)); | ||
} | ||
|
||
return out; | ||
} | ||
|
||
private double getUtilizationScoreForCPUShuffle(TaskIdWithUsage task) { | ||
return task.getUsage().getCpusUsed() / task.getRequestedResources().getCpus(); | ||
} | ||
|
||
private double getUtilizationScoreForMemoryShuffle(TaskIdWithUsage task) { | ||
double memoryUtilization = task.getUsage().getMemoryTotalBytes() / task.getRequestedResources().getMemoryMb(); | ||
double cpuUtilization = task.getUsage().getCpusUsed() / task.getRequestedResources().getCpus(); | ||
|
||
return (memoryUtilization + cpuUtilization) / 2; | ||
} | ||
|
||
private boolean isOverutilized(OverusedSlave slave, double cpuUsage, double memUsageBytes) { | ||
if (slave.resource.resourceType == Type.CPU) { | ||
return cpuUsage > slave.usage.getSystemCpusTotal(); | ||
} else { | ||
return memUsageBytes > getTargetMemoryUtilizationForHost(slave.usage); | ||
} | ||
} | ||
|
||
private boolean isShufflingTooManyTasks(long shuffledOnSlave, long shuffledOnCluster) { | ||
return shuffledOnSlave >= configuration.getMaxTasksToShufflePerHost() | ||
|| shuffledOnCluster >= configuration.getMaxTasksToShuffleTotal(); | ||
} | ||
|
||
private void bounce(TaskIdWithUsage task, TaskCleanupType cleanupType, Optional<String> message) { | ||
String actionId = UUID.randomUUID().toString(); | ||
|
||
taskManager.createTaskCleanup(new SingularityTaskCleanup( | ||
Optional.empty(), | ||
cleanupType, | ||
System.currentTimeMillis(), | ||
task.getTaskId(), | ||
message, | ||
Optional.of(actionId), | ||
Optional.empty(), Optional.empty() | ||
)); | ||
|
||
requestManager.addToPendingQueue(new SingularityPendingRequest( | ||
task.getTaskId().getRequestId(), | ||
task.getTaskId().getDeployId(), | ||
System.currentTimeMillis(), | ||
Optional.empty(), | ||
PendingType.TASK_BOUNCE, | ||
Optional.empty(), | ||
Optional.empty(), | ||
Optional.empty(), | ||
message, | ||
Optional.of(actionId) | ||
)); | ||
} | ||
|
||
private String getShuffleMessage(OverusedSlave slave, TaskIdWithUsage task, double cpuUsage, double memUsageBytes) { | ||
if (slave.resource.resourceType == Type.CPU) { | ||
return String.format( | ||
"Load on slave is %s / %s, shuffling task using %s / %s to less busy host", | ||
cpuUsage, | ||
slave.usage.getSystemCpusTotal(), | ||
task.getUsage().getCpusUsed(), | ||
task.getRequestedResources().getCpus() | ||
); | ||
} else { | ||
return String.format( | ||
"Mem usage on slave is %sMiB / %sMiB, shuffling task using %sMiB / %sMiB to less busy host", | ||
SizeUnit.BYTES.toMegabytes(((long) memUsageBytes)), | ||
SizeUnit.BYTES.toMegabytes(((long) slave.usage.getSystemMemTotalBytes())), | ||
SizeUnit.BYTES.toMegabytes(task.getUsage().getMemoryTotalBytes()), | ||
((long) task.getRequestedResources().getMemoryMb()) | ||
); | ||
} | ||
} | ||
|
||
private double getTargetMemoryUtilizationForHost(SingularitySlaveUsage usage) { | ||
return configuration.getShuffleTasksWhenSlaveMemoryUtilizationPercentageExceeds() * usage.getSystemMemTotalBytes(); | ||
} | ||
|
||
private OverusedResource getMostOverusedResource(SingularitySlaveUsage overloadedSlave) { | ||
double currentCpuLoad = getSystemLoadForShuffle(overloadedSlave); | ||
double currentMemUsageBytes = overloadedSlave.getMemoryBytesUsed(); | ||
|
||
double targetCpuUsage = overloadedSlave.getSystemCpusTotal(); | ||
double cpuOverage = currentCpuLoad - overloadedSlave.getSystemCpusTotal(); | ||
double cpuOverusage = cpuOverage / overloadedSlave.getSystemCpusTotal(); | ||
|
||
double targetMemUsageBytes = getTargetMemoryUtilizationForHost(overloadedSlave); | ||
double memOverageBytes = currentMemUsageBytes - targetMemUsageBytes; | ||
double memOverusage = memOverageBytes / targetMemUsageBytes; | ||
|
||
if (cpuOverusage > memOverusage) { | ||
return new OverusedResource(cpuOverage, targetCpuUsage, Type.CPU); | ||
} else { | ||
return new OverusedResource(memOverageBytes, targetMemUsageBytes, Type.MEMORY); | ||
} | ||
} | ||
|
||
private List<SingularityTaskCleanup> getShufflingTasks() { | ||
return taskManager.getCleanupTasks() | ||
.stream() | ||
.filter(SingularityTaskShuffler::isShuffleCleanup) | ||
.collect(Collectors.toList()); | ||
} | ||
|
||
private Map<String, Long> getShufflingTaskCountPerHost(List<SingularityTaskCleanup> shufflingTasks) { | ||
Map<String, Long> out = new HashMap<>(); | ||
|
||
for (SingularityTaskCleanup c : shufflingTasks) { | ||
String host = c.getTaskId().getSanitizedHost(); | ||
out.replace(c.getTaskId().getSanitizedHost(), out.getOrDefault(host, 0L) + 1); | ||
} | ||
|
||
return out; | ||
} | ||
|
||
private Optional<String> getHostId(OverusedSlave slave) { | ||
if (slave.tasks.size() <= 0) { | ||
return Optional.empty(); | ||
} | ||
|
||
// probably should add slave metadata to SingularitySlaveUsage | ||
return Optional.of(slave.tasks.get(0).getTaskId().getSanitizedHost()); | ||
} | ||
|
||
private static boolean isShuffleCleanup(SingularityTaskCleanup cleanup) { | ||
TaskCleanupType type = cleanup.getCleanupType(); | ||
return type == TaskCleanupType.REBALANCE_CPU_USAGE || type == TaskCleanupType.REBALANCE_MEMORY_USAGE; | ||
} | ||
|
||
private Set<String> getAssociatedRequestIds(List<SingularityTaskCleanup> cleanups) { | ||
return cleanups.stream() | ||
.map((taskCleanup) -> taskCleanup.getTaskId().getRequestId()) | ||
.collect(Collectors.toSet()); | ||
} | ||
|
||
private double getSystemLoadForShuffle(SingularitySlaveUsage usage) { | ||
switch (configuration.getMesosConfiguration().getScoreUsingSystemLoad()) { | ||
case LOAD_1: | ||
return usage.getSystemLoad1Min(); | ||
case LOAD_5: | ||
return usage.getSystemLoad5Min(); | ||
case LOAD_15: | ||
default: | ||
return usage.getSystemLoad15Min(); | ||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still thinking more on the metric for this. You were correct to point out the fact that, by shuffling more/smaller tasks we may very well hit the shuffling limits more often. I'm not certain if it will be often enough to be of consequence given the size of our cluster, but there are 2 things I could see us doing to compensate for it: