diff --git a/SingularityBase/src/main/java/com/hubspot/singularity/SlaveMatchState.java b/SingularityBase/src/main/java/com/hubspot/singularity/SlaveMatchState.java index 32df537347..5f2bbc2e48 100644 --- a/SingularityBase/src/main/java/com/hubspot/singularity/SlaveMatchState.java +++ b/SingularityBase/src/main/java/com/hubspot/singularity/SlaveMatchState.java @@ -2,6 +2,7 @@ public enum SlaveMatchState { OK(true), + PREFERRED_SLAVE(true), NOT_RACK_OR_SLAVE_PARTICULAR(true), RESOURCES_DO_NOT_MATCH(false), RACK_SATURATED(false), diff --git a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java index dd8ff8a9ac..e16ff22c6a 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/config/SingularityConfiguration.java @@ -399,6 +399,14 @@ public class SingularityConfiguration extends Configuration { private boolean proxyRunNowToLeader = true; + private double preferredSlaveScaleFactor = 1.5; + + // high cpu slave, based on cpu to memory ratio + private double highCpuSlaveCutOff = 1.5; //TODO + + // high memory slave, based on cpu to memory ratio + private double highMemorySlaveCutOff = 0.5; //TODO + @JsonProperty("crashLoop") private CrashLoopConfiguration crashLoopConfiguration = new CrashLoopConfiguration(); @@ -1693,6 +1701,30 @@ public void setSqlFallBackToBytesFields(boolean sqlFallBackToBytesFields) { this.sqlFallBackToBytesFields = sqlFallBackToBytesFields; } + public double getPreferredSlaveScaleFactor() { + return preferredSlaveScaleFactor; + } + + public void setPreferredSlaveScaleFactor(double preferredSlaveScaleFactor) { + this.preferredSlaveScaleFactor = preferredSlaveScaleFactor; + } + + public double getHighCpuSlaveCutOff() { + return highCpuSlaveCutOff; + } + + public void setHighCpuSlaveCutOff(double highCpuSlaveCutOff) { + this.highCpuSlaveCutOff = highCpuSlaveCutOff; + } + + public double getHighMemorySlaveCutOff() { + return highMemorySlaveCutOff; + } + + public void setHighMemorySlaveCutOff(double highMemorySlaveCutOff) { + this.highMemorySlaveCutOff = highMemorySlaveCutOff; + } + public CrashLoopConfiguration getCrashLoopConfiguration() { return crashLoopConfiguration; } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosOfferScheduler.java b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosOfferScheduler.java index 4b0178031a..2222d16d7a 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosOfferScheduler.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularityMesosOfferScheduler.java @@ -11,7 +11,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantLock; import java.util.function.Function; import java.util.stream.Collectors; @@ -473,7 +472,7 @@ private MaxProbableUsage getMaxProbableUsageForSlave(List act if (taskId.getSanitizedHost().equals(sanitizedHostname)) { if (requestUtilizations.containsKey(taskId.getRequestId())) { RequestUtilization utilization = requestUtilizations.get(taskId.getRequestId()); - cpu += getEstimatedCpuUsageForRequest(utilization); + cpu += slaveAndRackHelper.getEstimatedCpuUsageForRequest(utilization); memBytes += utilization.getMaxMemBytesUsed(); diskBytes += utilization.getMaxDiskBytesUsed(); } else { @@ -539,11 +538,6 @@ private List getSortedDueTaskRequests() { .collect(Collectors.toList()); } - private double getEstimatedCpuUsageForRequest(RequestUtilization requestUtilization) { - // To account for cpu bursts, tend towards max usage if the app is consistently over-utilizing cpu, tend towards avg if it is over-utilized in short bursts - return (requestUtilization.getMaxCpuUsed() - requestUtilization.getAvgCpuUsed()) * requestUtilization.getCpuBurstRating() + requestUtilization.getAvgCpuUsed(); - } - private double score(SingularityOfferHolder offerHolder, SingularityTaskRequestHolder taskRequestHolder, Optional maybeSlaveUsage, List activeTaskIdsForRequest, RequestUtilization requestUtilization, Map> deployStatsCache) { @@ -553,7 +547,7 @@ private double score(SingularityOfferHolder offerHolder, SingularityTaskRequestH double estimatedCpusToAdd = taskRequestHolder.getTotalResources().getCpus(); if (requestUtilization != null) { - estimatedCpusToAdd = getEstimatedCpuUsageForRequest(requestUtilization); + estimatedCpusToAdd = slaveAndRackHelper.getEstimatedCpuUsageForRequest(requestUtilization); } if (mesosConfiguration.isOmitOverloadedHosts() && maybeSlaveUsage.isPresent() && maybeSlaveUsage.get().isCpuOverloaded(estimatedCpusToAdd)) { LOG.debug("Slave {} is overloaded (load5 {}/{}, load1 {}/{}, estimated cpus to add: {}, already committed cpus: {}), ignoring offer", @@ -576,10 +570,10 @@ private double score(SingularityOfferHolder offerHolder, SingularityTaskRequestH if (!matchesResources) { return 0; } - final SlaveMatchState slaveMatchState = slaveAndRackManager.doesOfferMatch(offerHolder, taskRequest, activeTaskIdsForRequest, isPreemptibleTask(taskRequest, deployStatsCache)); + final SlaveMatchState slaveMatchState = slaveAndRackManager.doesOfferMatch(offerHolder, taskRequest, activeTaskIdsForRequest, isPreemptibleTask(taskRequest, deployStatsCache), requestUtilization); if (slaveMatchState.isMatchAllowed()) { - return score(offerHolder.getHostname(), maybeSlaveUsage); + return score(offerHolder.getHostname(), maybeSlaveUsage, slaveMatchState); } else if (LOG.isTraceEnabled()) { LOG.trace("Ignoring offer on host {} with roles {} on {} for task {}; matched resources: {}, slave match state: {}", offerHolder.getHostname(), offerHolder.getRoles(), offerHolder.getHostname(), pendingTaskId, matchesResources, slaveMatchState); @@ -605,7 +599,7 @@ private boolean isPreemptibleTask(SingularityTaskRequest taskRequest, Map maybeSlaveUsage) { + double score(String hostname, Optional maybeSlaveUsage, SlaveMatchState slaveMatchState) { if (!maybeSlaveUsage.isPresent() || maybeSlaveUsage.get().isMissingUsageData()) { if (mesosConfiguration.isOmitForMissingUsageData()) { LOG.info("Skipping slave {} with missing usage data ({})", hostname, maybeSlaveUsage); @@ -618,11 +612,18 @@ private boolean isPreemptibleTask(SingularityTaskRequest taskRequest, Map attributes, Map attributes, Map otherAttributes) { + for (Map.Entry entry : otherAttributes.entrySet()) { + if ((attributes.containsKey(entry.getKey()) && attributes.get(entry.getKey()).equals(entry.getValue()))) { + return true; + } + } + return false; + } + + public enum CpuMemoryPreference { + AVERAGE, + HIGH_MEMORY, + HIGH_CPU + } + + public CpuMemoryPreference getCpuMemoryPreferenceForRequest(RequestUtilization requestUtilization) { + double cpuMemoryRatioForRequest = getCpuMemoryRatioForRequest(requestUtilization); + + if (cpuMemoryRatioForRequest > configuration.getHighCpuSlaveCutOff()) { + return CpuMemoryPreference.HIGH_CPU; + } else if (cpuMemoryRatioForRequest < configuration.getHighMemorySlaveCutOff()) { + return CpuMemoryPreference.HIGH_MEMORY; + } + return CpuMemoryPreference.AVERAGE; + } + + public CpuMemoryPreference getCpuMemoryPreferenceForSlave(SingularityOfferHolder offerHolder) { + double cpuMemoryRatioForSlave = getCpuMemoryRatioForSlave(offerHolder); + if (cpuMemoryRatioForSlave > configuration.getHighCpuSlaveCutOff()) { + return CpuMemoryPreference.HIGH_CPU; + } else if (cpuMemoryRatioForSlave < configuration.getHighMemorySlaveCutOff()) { + return CpuMemoryPreference.HIGH_MEMORY; + } + return CpuMemoryPreference.AVERAGE; + } + + private double getCpuMemoryRatioForSlave(SingularityOfferHolder offerHolder) { + double memory = MesosUtils.getMemory(offerHolder.getCurrentResources(), Optional.empty()); + double cpus = MesosUtils.getNumCpus(offerHolder.getCurrentResources(), Optional.empty()); + return memory/cpus; + } + + private double getCpuMemoryRatioForRequest(RequestUtilization requestUtilization) { + double cpuUsageForRequest = getEstimatedCpuUsageForRequest(requestUtilization); + double memUsageForRequest = requestUtilization.getAvgMemBytesUsed(); + return cpuUsageForRequest/memUsageForRequest; + } + + public double getEstimatedCpuUsageForRequest(RequestUtilization requestUtilization) { + // To account for cpu bursts, tend towards max usage if the app is consistently over-utilizing cpu, tend towards avg if it is over-utilized in short bursts + return (requestUtilization.getMaxCpuUsed() - requestUtilization.getAvgCpuUsed()) * requestUtilization.getCpuBurstRating() + requestUtilization.getAvgCpuUsed(); + } + } diff --git a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java index 08fa857ae7..95b4034638 100644 --- a/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java +++ b/SingularityService/src/main/java/com/hubspot/singularity/mesos/SingularitySlaveAndRackManager.java @@ -28,6 +28,7 @@ import com.hubspot.mesos.json.MesosMasterSlaveObject; import com.hubspot.mesos.json.MesosMasterStateObject; import com.hubspot.singularity.MachineState; +import com.hubspot.singularity.RequestUtilization; import com.hubspot.singularity.SingularityMachineAbstraction; import com.hubspot.singularity.SingularityPendingRequest.PendingType; import com.hubspot.singularity.SingularityPendingTask; @@ -46,6 +47,7 @@ import com.hubspot.singularity.data.RackManager; import com.hubspot.singularity.data.SlaveManager; import com.hubspot.singularity.data.TaskManager; +import com.hubspot.singularity.mesos.SingularitySlaveAndRackHelper.CpuMemoryPreference; import com.hubspot.singularity.scheduler.SingularityLeaderCache; import com.hubspot.singularity.sentry.SingularityExceptionNotifier; @@ -83,7 +85,7 @@ public class SingularitySlaveAndRackManager { this.leaderCache = leaderCache; } - SlaveMatchState doesOfferMatch(SingularityOfferHolder offerHolder, SingularityTaskRequest taskRequest, List activeTaskIdsForRequest, boolean isPreemptibleTask) { + SlaveMatchState doesOfferMatch(SingularityOfferHolder offerHolder, SingularityTaskRequest taskRequest, List activeTaskIdsForRequest, boolean isPreemptibleTask, RequestUtilization requestUtilization) { final String host = offerHolder.getHostname(); final String rackId = offerHolder.getRackId(); final String slaveId = offerHolder.getSlaveId(); @@ -247,9 +249,36 @@ SlaveMatchState doesOfferMatch(SingularityOfferHolder offerHolder, SingularityTa case GREEDY: } + if (isSlavePreferred(offerHolder, taskRequest, requestUtilization)) { + LOG.info("Slave {} is preferred", offerHolder.getHostname()); + return SlaveMatchState.PREFERRED_SLAVE; + } + return SlaveMatchState.OK; } + private boolean isSlavePreferred(SingularityOfferHolder offerHolder, SingularityTaskRequest taskRequest, RequestUtilization requestUtilization) { + return isSlavePreferredByAllowedAttributes(offerHolder, taskRequest) || isSlavePreferredByCpuMemory(offerHolder, requestUtilization); + } + + private boolean isSlavePreferredByAllowedAttributes(SingularityOfferHolder offerHolder, SingularityTaskRequest taskRequest) { + Map allowedAttributes = getAllowedAttributes(taskRequest); + Map hostAttributes = offerHolder.getTextAttributes(); + boolean containsAtLeastOneMatchingAttribute = slaveAndRackHelper.containsAtLeastOneMatchingAttribute(hostAttributes, allowedAttributes); + LOG.info("is slave {} by allowed attributes? {}", offerHolder.getHostname(), containsAtLeastOneMatchingAttribute); + return containsAtLeastOneMatchingAttribute; + } + + public boolean isSlavePreferredByCpuMemory(SingularityOfferHolder offerHolder, RequestUtilization requestUtilization) { + if (requestUtilization != null) { + CpuMemoryPreference cpuMemoryPreferenceForSlave = slaveAndRackHelper.getCpuMemoryPreferenceForSlave(offerHolder); + CpuMemoryPreference cpuMemoryPreferenceForRequest = slaveAndRackHelper.getCpuMemoryPreferenceForRequest(requestUtilization); + LOG.info("CpuMemoryPreference for slave {} is {}, CpuMemoryPreference for request {} is {}", offerHolder.getHostname(), cpuMemoryPreferenceForSlave.toString(), requestUtilization.getRequestId(), cpuMemoryPreferenceForRequest.toString()); + return cpuMemoryPreferenceForSlave == cpuMemoryPreferenceForRequest; + } + return false; + } + private boolean isSlaveAttributesMatch(SingularityOfferHolder offer, SingularityTaskRequest taskRequest, boolean isPreemptibleTask) { final Map requiredAttributes = getRequiredAttributes(taskRequest); final Map allowedAttributes = getAllowedAttributes(taskRequest); @@ -288,7 +317,6 @@ private boolean isSlaveAttributesMatch(SingularityOfferHolder offer, Singularity } return true; - } private Map getRequiredAttributes(SingularityTaskRequest taskRequest) { diff --git a/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityMesosOfferSchedulerTest.java b/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityMesosOfferSchedulerTest.java index 5e7cbeaf9b..da00f2fd0a 100644 --- a/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityMesosOfferSchedulerTest.java +++ b/SingularityService/src/test/java/com/hubspot/singularity/mesos/SingularityMesosOfferSchedulerTest.java @@ -26,6 +26,7 @@ import com.hubspot.singularity.SingularityTaskId; import com.hubspot.singularity.SingularityTaskRequest; import com.hubspot.singularity.SingularityUser; +import com.hubspot.singularity.SlaveMatchState; import com.hubspot.singularity.api.SingularityScaleRequest; import com.hubspot.singularity.config.SingularityConfiguration; import com.hubspot.singularity.data.DeployManager; @@ -100,11 +101,11 @@ public void itCorrectlyUsesDefaults() { setRequestType(RequestType.SERVICE); // LR - no usage tracked -> default score - assertValueIs(0.50, scheduler.score(SLAVE_ID, Optional.empty())); + assertValueIs(0.50, scheduler.score(SLAVE_ID, Optional.empty(), SlaveMatchState.OK)); // NLR - no deployStatistics -> default weights setRequestType(RequestType.ON_DEMAND); - assertValueIs(0.5, scheduler.score(SLAVE_ID, Optional.of(getUsage(5, 10, 5, 5, 10, 5, 5, 10, 5)))); + assertValueIs(0.5, scheduler.score(SLAVE_ID, Optional.of(getUsage(5, 10, 5, 5, 10, 5, 5, 10, 5)), SlaveMatchState.OK)); } @Test @@ -112,35 +113,44 @@ public void itCorrectlyScoresLongRunningTasks() { setRequestType(RequestType.SERVICE); // new slave (no resources used) -> perfect score - assertValueIs(1, scheduler.score(SLAVE_ID, Optional.of(getUsage(0,10, 0, 0,10, 0, 0, 10, 0)))); + assertValueIs(1, scheduler.score(SLAVE_ID, Optional.of(getUsage(0,10, 0, 0,10, 0, 0, 10, 0)), SlaveMatchState.OK)); // cpu used, no mem used, no disk used - assertValueIs(0.85, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 5, 10, 5, 0, 10, 0)))); - assertValueIs(0.76, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 8, 10, 8, 0, 10, 0)))); + assertValueIs(0.85, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 5, 10, 5, 0, 10, 0)), SlaveMatchState.OK)); + assertValueIs(0.76, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 8, 10, 8, 0, 10, 0)), SlaveMatchState.OK)); // no cpu used, mem used, no disk used - assertValueIs(0.75, scheduler.score(SLAVE_ID, Optional.of(getUsage(5, 10, 5, 0, 10, 0, 0, 10, 0)))); - assertValueIs(0.60, scheduler.score(SLAVE_ID, Optional.of(getUsage(8, 10, 8, 0, 10, 0, 0, 10, 0)))); + assertValueIs(0.75, scheduler.score(SLAVE_ID, Optional.of(getUsage(5, 10, 5, 0, 10, 0, 0, 10, 0)), SlaveMatchState.OK)); + assertValueIs(0.60, scheduler.score(SLAVE_ID, Optional.of(getUsage(8, 10, 8, 0, 10, 0, 0, 10, 0)), SlaveMatchState.OK)); // no cpu used, no mem used, disk used - assertValueIs(0.90, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 0, 10, 0, 5, 10, 5)))); - assertValueIs(0.84, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 0, 10, 0, 8, 10, 8)))); + assertValueIs(0.90, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 0, 10, 0, 5, 10, 5)), SlaveMatchState.OK)); + assertValueIs(0.84, scheduler.score(SLAVE_ID, Optional.of(getUsage(0, 10, 0, 0, 10, 0, 8, 10, 8)), SlaveMatchState.OK)); // cpu used, mem used, no disk used - assertValueIs(0.60, scheduler.score(SLAVE_ID, Optional.of(getUsage(5, 10, 5, 5, 10, 5, 0, 10, 0)))); - assertValueIs(0.36, scheduler.score(SLAVE_ID, Optional.of(getUsage(8, 10, 8, 8, 10, 8, 0, 10, 0)))); + assertValueIs(0.60, scheduler.score(SLAVE_ID, Optional.of(getUsage(5, 10, 5, 5, 10, 5, 0, 10, 0)), SlaveMatchState.OK)); + assertValueIs(0.36, scheduler.score(SLAVE_ID, Optional.of(getUsage(8, 10, 8, 8, 10, 8, 0, 10, 0)), SlaveMatchState.OK)); // no cpu used, mem used, disk used - assertValueIs(0.65, scheduler.score(SLAVE_ID, Optional.of(getUsage(5,10, 5, 0, 10,0, 5, 10, 5)))); - assertValueIs(0.44, scheduler.score(SLAVE_ID, Optional.of(getUsage(8,10, 8, 0,10, 0, 8, 10, 8)))); + assertValueIs(0.65, scheduler.score(SLAVE_ID, Optional.of(getUsage(5,10, 5, 0, 10,0, 5, 10, 5)), SlaveMatchState.OK)); + assertValueIs(0.44, scheduler.score(SLAVE_ID, Optional.of(getUsage(8,10, 8, 0,10, 0, 8, 10, 8)), SlaveMatchState.OK)); // cpu used, no mem used, disk used - assertValueIs(0.75, scheduler.score(SLAVE_ID, Optional.of(getUsage(0,10, 0, 5,10, 5, 5, 10, 5)))); - assertValueIs(0.60, scheduler.score(SLAVE_ID, Optional.of(getUsage(0,10, 0, 8,10, 8, 8, 10, 8)))); + assertValueIs(0.75, scheduler.score(SLAVE_ID, Optional.of(getUsage(0,10, 0, 5,10, 5, 5, 10, 5)), SlaveMatchState.OK)); + assertValueIs(0.60, scheduler.score(SLAVE_ID, Optional.of(getUsage(0,10, 0, 8,10, 8, 8, 10, 8)), SlaveMatchState.OK)); // cpu used, mem used, disk used - assertValueIs(0.5, scheduler.score(SLAVE_ID, Optional.of(getUsage(5,10, 5, 5,10, 5, 5, 10, 5)))); - assertValueIs(0.2, scheduler.score(SLAVE_ID, Optional.of(getUsage(8,10, 8, 8,10, 8, 8, 10, 8)))); + assertValueIs(0.5, scheduler.score(SLAVE_ID, Optional.of(getUsage(5,10, 5, 5,10, 5, 5, 10, 5)), SlaveMatchState.OK)); + assertValueIs(0.2, scheduler.score(SLAVE_ID, Optional.of(getUsage(8,10, 8, 8,10, 8, 8, 10, 8)), SlaveMatchState.OK)); + } + + @Test + public void itCorrectlyScalesScoresForPreferredHosts() { + assertValueIs(0.50, scheduler.score(SLAVE_ID, Optional.of(getUsage(5,10, 5, 5,10, 5, 5, 10, 5)), SlaveMatchState.OK)); + assertValueIs(0.75, scheduler.score(SLAVE_ID, Optional.of(getUsage(5,10, 5, 5,10, 5, 5, 10, 5)), SlaveMatchState.PREFERRED_SLAVE)); + + assertValueIs(0.20, scheduler.score(SLAVE_ID, Optional.of(getUsage(8,10, 8, 8,10, 8, 8, 10, 8)), SlaveMatchState.OK)); + assertValueIs(0.30, scheduler.score(SLAVE_ID, Optional.of(getUsage(8,10, 8, 8,10, 8, 8, 10, 8)), SlaveMatchState.PREFERRED_SLAVE)); } @Test