Skip to content

Commit

Permalink
HDDS-10358. Allow Container Balancer tuning options to be set in CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
sarvekshayr committed Feb 20, 2024
1 parent c8e6cab commit f53937f
Show file tree
Hide file tree
Showing 11 changed files with 232 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,13 @@ StartContainerBalancerResponseProto startContainerBalancer(
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
Optional<Long> maxSizeEnteringTargetInGB,
Optional<Long> maxSizeLeavingSourceInGB) throws IOException;
Optional<Long> maxSizeLeavingSourceInGB,
Optional<Long> balancingInterval,
Optional<Long> moveTimeout,
Optional<Long> moveReplicationTimeout,
Optional<Boolean> networkTopologyEnable,
Optional<String> includeNodes,
Optional<String> excludeNodes) throws IOException;

/**
* Stop ContainerBalancer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,13 @@ StartContainerBalancerResponseProto startContainerBalancer(
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
Optional<Long> maxSizeEnteringTargetInGB,
Optional<Long> maxSizeLeavingSourceInGB) throws IOException;
Optional<Long> maxSizeLeavingSourceInGB,
Optional<Long> balancingInterval,
Optional<Long> moveTimeout,
Optional<Long> moveReplicationTimeout,
Optional<Boolean> networkTopologyEnable,
Optional<String> includeNodes,
Optional<String> excludeNodes) throws IOException;

/**
* Stop ContainerBalancer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,13 @@ public StartContainerBalancerResponseProto startContainerBalancer(
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
Optional<Long> maxSizeEnteringTargetInGB,
Optional<Long> maxSizeLeavingSourceInGB) throws IOException {
Optional<Long> maxSizeLeavingSourceInGB,
Optional<Long> balancingInterval,
Optional<Long> moveTimeout,
Optional<Long> moveReplicationTimeout,
Optional<Boolean> networkTopologyEnable,
Optional<String> includeNodes,
Optional<String> excludeNodes) throws IOException {
StartContainerBalancerRequestProto.Builder builder =
StartContainerBalancerRequestProto.newBuilder();
builder.setTraceID(TracingUtil.exportCurrentSpan());
Expand Down Expand Up @@ -952,6 +958,48 @@ public StartContainerBalancerResponseProto startContainerBalancer(
builder.setMaxSizeLeavingSourceInGB(msls);
}

if (balancingInterval.isPresent()) {
long bi = balancingInterval.get();
Preconditions.checkState(bi > 0,
"balancingInterval must be greater than zero.");
builder.setBalancingInterval(bi);
}

if (moveTimeout.isPresent()) {
long mt = moveTimeout.get();
Preconditions.checkState(mt > 0,
"moveTimeout must be greater than zero.");
builder.setMoveTimeout(mt);
}

if (moveReplicationTimeout.isPresent()) {
long mrt = moveReplicationTimeout.get();
Preconditions.checkState(mrt > 0,
"moveTimeout must be greater than zero.");
builder.setMoveReplicationTimeout(mrt);
}

if (networkTopologyEnable.isPresent()) {
Boolean nt = networkTopologyEnable.get();
Preconditions.checkState(nt != null,
"networkTopologyEnable must be either true or false");
builder.setNetworkTopologyEnable(nt);
}

if (includeNodes.isPresent()) {
String in = includeNodes.get();
Preconditions.checkState(in != null,
"includeNodes must contain comma separated hostnames or ip addresses");
builder.setIncludeNodes(in);
}

if (excludeNodes.isPresent()) {
String ex = excludeNodes.get();
Preconditions.checkState(ex != null,
"excludeNodes must contain comma separated hostnames or ip addresses");
builder.setExcludeNodes(ex);
}

StartContainerBalancerRequestProto request = builder.build();
return submitRequest(Type.StartContainerBalancer,
builder1 -> builder1.setStartContainerBalancerRequest(request))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,12 @@ message StartContainerBalancerRequestProto {
optional int64 maxSizeLeavingSourceInGB = 7;
optional int32 maxDatanodesPercentageToInvolvePerIteration = 8;
optional int32 iterations = 9;
optional int64 balancingInterval = 10;
optional int64 moveTimeout = 11;
optional int64 moveReplicationTimeout = 12;
optional bool networkTopologyEnable = 13;
optional string includeNodes = 14;
optional string excludeNodes = 15;
}

message StartContainerBalancerResponseProto {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,16 +330,16 @@ public void setMoveTimeout(Duration duration) {
this.moveTimeout = duration.toMillis();
}

public void setMoveTimeout(long millis) {
this.moveTimeout = millis;
public void setMoveTimeout(long minutes) {
this.moveTimeout = minutes * 60 * 1000;
}

public Duration getMoveReplicationTimeout() {
return Duration.ofMillis(moveReplicationTimeout);
}

public void setMoveReplicationTimeout(long millis) {
this.moveReplicationTimeout = millis;
public void setMoveReplicationTimeout(long minutes) {
this.moveReplicationTimeout = minutes * 60 * 1000;
}

public Duration getBalancingInterval() {
Expand All @@ -350,8 +350,8 @@ public void setBalancingInterval(Duration balancingInterval) {
this.balancingInterval = balancingInterval.toMillis();
}

public void setBalancingInterval(long millis) {
this.balancingInterval = millis;
public void setBalancingInterval(long minutes) {
this.balancingInterval = minutes * 60 * 1000;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics;
import org.apache.hadoop.ozone.ClientVersion;
import org.apache.hadoop.ozone.upgrade.UpgradeFinalizer.StatusAndMessages;
import org.apache.hadoop.thirdparty.org.checkerframework.checker.nullness.Opt;
import org.apache.hadoop.util.ProtobufUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -1107,6 +1108,12 @@ public StartContainerBalancerResponseProto startContainerBalancer(
Optional<Long> maxSizeToMovePerIterationInGB = Optional.empty();
Optional<Long> maxSizeEnteringTargetInGB = Optional.empty();
Optional<Long> maxSizeLeavingSourceInGB = Optional.empty();
Optional<Long> balancingInterval = Optional.empty();
Optional<Long> moveTimeout = Optional.empty();
Optional<Long> moveReplicationTimeout = Optional.empty();
Optional<Boolean> networkTopologyEnable = Optional.empty();
Optional<String> includeNodes = Optional.empty();
Optional<String> excludeNodes = Optional.empty();

if (request.hasThreshold()) {
threshold = Optional.of(request.getThreshold());
Expand All @@ -1132,19 +1139,47 @@ public StartContainerBalancerResponseProto startContainerBalancer(
maxSizeToMovePerIterationInGB =
Optional.of(request.getMaxSizeToMovePerIterationInGB());
}

if (request.hasMaxSizeEnteringTargetInGB()) {
maxSizeEnteringTargetInGB =
Optional.of(request.getMaxSizeEnteringTargetInGB());
}

if (request.hasMaxSizeLeavingSourceInGB()) {
maxSizeLeavingSourceInGB =
Optional.of(request.getMaxSizeLeavingSourceInGB());
}

if (request.hasBalancingInterval()) {
balancingInterval = Optional.of(request.getBalancingInterval());
}

if (request.hasMoveTimeout()) {
moveTimeout = Optional.of(request.getMoveTimeout());
}

if (request.hasMoveReplicationTimeout()) {
moveReplicationTimeout = Optional.of(request.getMoveReplicationTimeout());
}

if (request.hasNetworkTopologyEnable()) {
networkTopologyEnable = Optional.of(request.getNetworkTopologyEnable());
}

if (request.hasIncludeNodes()) {
includeNodes = Optional.of(request.getIncludeNodes());
}

if (request.hasExcludeNodes()) {
excludeNodes = Optional.of(request.getExcludeNodes());
}

return impl.startContainerBalancer(threshold, iterations,
maxDatanodesPercentageToInvolvePerIteration,
maxSizeToMovePerIterationInGB, maxSizeEnteringTargetInGB,
maxSizeLeavingSourceInGB);
maxSizeLeavingSourceInGB, balancingInterval, moveTimeout,
moveReplicationTimeout, networkTopologyEnable, includeNodes,
excludeNodes);
}

public StopContainerBalancerResponseProto stopContainerBalancer(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,13 @@ public StartContainerBalancerResponseProto startContainerBalancer(
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
Optional<Long> maxSizeEnteringTarget,
Optional<Long> maxSizeLeavingSource) throws IOException {
Optional<Long> maxSizeLeavingSource,
Optional<Long> balancingInterval,
Optional<Long> moveTimeout,
Optional<Long> moveReplicationTimeout,
Optional<Boolean> networkTopologyEnable,
Optional<String> includeNodes,
Optional<String> excludeNodes) throws IOException {
getScm().checkAdminAccess(getRemoteUser(), false);
ContainerBalancerConfiguration cbc =
scm.getConfiguration().getObject(ContainerBalancerConfiguration.class);
Expand Down Expand Up @@ -1105,6 +1111,54 @@ public StartContainerBalancerResponseProto startContainerBalancer(
cbc.setMaxSizeLeavingSource(msls * OzoneConsts.GB);
}

if (balancingInterval.isPresent()) {
long bi = balancingInterval.get();
auditMap.put("balancingInterval",String.valueOf(bi));
Preconditions.checkState(bi > 0,
"balancingInterval must be greater than zero.");
cbc.setBalancingInterval(bi);
}

if (moveTimeout.isPresent()) {
long mt = moveTimeout.get();
auditMap.put("moveTimeout", String.valueOf(mt));
Preconditions.checkState(mt > 0,
"moveTimeout must be greater than zero.");
cbc.setMoveTimeout(mt);
}

if (moveReplicationTimeout.isPresent()) {
long mrt = moveReplicationTimeout.get();
auditMap.put("moveReplicationTimeout", String.valueOf(mrt));
Preconditions.checkState(mrt > 0,
"moveTimeout must be greater than zero.");
cbc.setMoveReplicationTimeout(mrt);
}

if (networkTopologyEnable.isPresent()) {
Boolean nt = networkTopologyEnable.get();
auditMap.put("networkTopologyEnable", String.valueOf(nt));
Preconditions.checkState(nt != null,
"networkTopologyEnable must be either true or false");
cbc.setNetworkTopologyEnable(nt);
}

if (includeNodes.isPresent()) {
String in = includeNodes.get();
auditMap.put("includeNodes", (in));
Preconditions.checkState(in != null,
"includeNodes must contain comma separated hostnames or ip addresses");
cbc.setIncludeNodes(in);
}

if (excludeNodes.isPresent()) {
String ex = excludeNodes.get();
auditMap.put("excludeNodes", (ex));
Preconditions.checkState(ex != null,
"includeNodes must contain comma separated hostnames or ip addresses");
cbc.setExcludeNodes(ex);
}

ContainerBalancer containerBalancer = scm.getContainerBalancer();
try {
containerBalancer.startBalancer(cbc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,54 @@ public class ContainerBalancerStartSubcommand extends ScmSubcommand {
"(for example, '26' for 26GB).")
private Optional<Long> maxSizeLeavingSourceInGB;

@Option(names = {"-b", "--balancing-iteration-interval"},
description = "The interval period between each iteration of Container Balancer." +
"(for example, '70' for 70m).")
private Optional<Long> balancingInterval;

@Option(names = {"-mt", "--move-timeout"},
description = "The amount of time to allow a single container to move " +
"from source to target (for example, '65' for 65m).")
private Optional<Long> moveTimeout;

@Option(names ={"-mrt", "--move-replication-timeout"},
description = "The " +
"amount of time to allow a single container's replication from source " +
"to target as part of container move. For example, if \"hdds.container" +
".balancer.move.timeout\" is 65 minutes, then out of those 65 minutes " +
"50 minutes will be the deadline for replication to complete (for example," +
"'50' for 50m).")
private Optional<Long> moveReplicationTimeout;

@Option(names = {"-nt", "--move-network-topology-enable"},
description = "Whether to take network topology into account when " +
"selecting a target for a source. " +
"This configuration is false by default.")
private Optional<Boolean> networkTopologyEnable;

@Option(names = {"-in", "--include-datanodes"},
description = "A list of Datanode " +
"hostnames or ip addresses separated by commas. Only the Datanodes " +
"specified in this list are balanced. This configuration is empty by " +
"default and is applicable only if it is non-empty (for example, \"hostname1,hostname2,hostname3\").")
private Optional<String> includeNodes;

@Option(names = {"-ex", "--exclude-datanodes"},
description = "A list of Datanode " +
"hostnames or ip addresses separated by commas. The Datanodes specified " +
"in this list are excluded from balancing. This configuration is empty " +
"by default (for example, \"hostname1,hostname2,hostname3\").")
private Optional<String> excludeNodes;

@Override
public void execute(ScmClient scmClient) throws IOException {
StartContainerBalancerResponseProto response = scmClient.
startContainerBalancer(threshold, iterations,
maxDatanodesPercentageToInvolvePerIteration,
maxSizeToMovePerIterationInGB, maxSizeEnteringTargetInGB,
maxSizeLeavingSourceInGB);
maxSizeLeavingSourceInGB, balancingInterval, moveTimeout,
moveReplicationTimeout, networkTopologyEnable, includeNodes,
excludeNodes);
if (response.getStart()) {
System.out.println("Container Balancer started successfully.");
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,19 @@ public StartContainerBalancerResponseProto startContainerBalancer(
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
Optional<Long> maxSizeEnteringTargetInGB,
Optional<Long> maxSizeLeavingSourceInGB)
throws IOException {
Optional<Long> maxSizeLeavingSourceInGB,
Optional<Long> balancingInterval,
Optional<Long> moveTimeout,
Optional<Long> moveReplicationTimeout,
Optional<Boolean> networkTopologyEnable,
Optional<String> includeNodes,
Optional<String> excludeNodes) throws IOException {
return storageContainerLocationClient.startContainerBalancer(threshold,
iterations, maxDatanodesPercentageToInvolvePerIteration,
maxSizeToMovePerIterationInGB, maxSizeEnteringTargetInGB,
maxSizeLeavingSourceInGB);
maxSizeLeavingSourceInGB, balancingInterval, moveTimeout,
moveReplicationTimeout, networkTopologyEnable, includeNodes,
excludeNodes);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public void testContainerBalancerStartSubcommandWhenBalancerIsNotRunning()
throws IOException {
ScmClient scmClient = mock(ScmClient.class);
when(scmClient.startContainerBalancer(
null, null, null, null, null, null))
null, null, null, null, null, null, null, null, null, null, null, null))
.thenReturn(
StorageContainerLocationProtocolProtos
.StartContainerBalancerResponseProto.newBuilder()
Expand All @@ -133,7 +133,7 @@ public void testContainerBalancerStartSubcommandWhenBalancerIsRunning()
throws IOException {
ScmClient scmClient = mock(ScmClient.class);
when(scmClient.startContainerBalancer(
null, null, null, null, null, null))
null, null, null, null, null, null, null, null, null, null, null, null))
.thenReturn(StorageContainerLocationProtocolProtos
.StartContainerBalancerResponseProto.newBuilder()
.setStart(false)
Expand Down
Loading

0 comments on commit f53937f

Please sign in to comment.