Skip to content

Commit

Permalink
Merge branch 'apache:master' into HDDS-11243
Browse files Browse the repository at this point in the history
  • Loading branch information
slfan1989 authored Nov 11, 2024
2 parents ee92e3d + 2797c45 commit 5db5824
Show file tree
Hide file tree
Showing 36 changed files with 781 additions and 128 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ env:
# Minimum required Java version for running Ozone is defined in pom.xml (javac.version).
TEST_JAVA_VERSION: 17 # JDK version used by CI build and tests; should match the JDK version in apache/ozone-runner image
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
OZONE_RUNNER_IMAGE: ghcr.io/apache/ozone-runner
OZONE_RUNNER_VERSION: 20241108-jdk17-1
OZONE_WITH_COVERAGE: ${{ github.event_name == 'push' }}
jobs:
build-info:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,4 +405,7 @@ private HddsConfigKeys() {
"hdds.datanode.slow.op.warning.threshold";
public static final String HDDS_DATANODE_SLOW_OP_WARNING_THRESHOLD_DEFAULT =
"500ms";

public static final String OZONE_DATANODE_IO_METRICS_PERCENTILES_INTERVALS_SECONDS_KEY =
"ozone.volume.io.percentiles.intervals.seconds";
}
12 changes: 12 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4553,4 +4553,16 @@
maximum number of buckets across all volumes.
</description>
</property>

<property>
<name>ozone.volume.io.percentiles.intervals.seconds</name>
<value>60</value>
<tag>OZONE, DATANODE</tag>
<description>
This setting specifies the interval (in seconds) for monitoring percentile performance metrics.
It helps in tracking the read and write performance of DataNodes in real-time,
allowing for better identification and analysis of performance issues.
</description>
</property>

</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,32 @@
*/
@InterfaceAudience.Private
public interface DNMXBean extends ServiceRuntimeInfo {

/**
* Gets the datanode hostname.
*
* @return the datanode hostname for the datanode.
*/
String getHostname();

/**
* Gets the client rpc port.
*
* @return the client rpc port
*/
String getClientRpcPort();

/**
* Gets the http port.
*
* @return the http port
*/
String getHttpPort();

/**
* Gets the https port.
*
* @return the http port
*/
String getHttpsPort();
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,53 @@
* This is the JMX management class for DN information.
*/
public class DNMXBeanImpl extends ServiceRuntimeInfoImpl implements DNMXBean {
public DNMXBeanImpl(
VersionInfo versionInfo) {

private String hostName;
private String clientRpcPort;
private String httpPort;
private String httpsPort;

public DNMXBeanImpl(VersionInfo versionInfo) {
super(versionInfo);
}

@Override
public String getHostname() {
return hostName;
}

@Override
public String getClientRpcPort() {
return clientRpcPort;
}

@Override
public String getHttpPort() {
return httpPort;
}

@Override
public String getHttpsPort() {
return httpsPort;
}

public void setHttpPort(String httpPort) {
this.httpPort = httpPort;
}

public void setHostName(String hostName) {
this.hostName = hostName;
}

public void setClientRpcPort(String rpcPort) {
this.clientRpcPort = rpcPort;
}

public String getHostName() {
return hostName;
}

public void setHttpsPort(String httpsPort) {
this.httpsPort = httpsPort;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ public String getNamespace() {
String ip = InetAddress.getByName(hostname).getHostAddress();
datanodeDetails = initializeDatanodeDetails();
datanodeDetails.setHostName(hostname);
serviceRuntimeInfo.setHostName(hostname);
datanodeDetails.setIpAddress(ip);
datanodeDetails.setVersion(
HddsVersionInfo.HDDS_VERSION_INFO.getVersion());
Expand Down Expand Up @@ -300,23 +301,30 @@ public String getNamespace() {
httpServer = new HddsDatanodeHttpServer(conf);
httpServer.start();
HttpConfig.Policy policy = HttpConfig.getHttpPolicy(conf);

if (policy.isHttpEnabled()) {
datanodeDetails.setPort(DatanodeDetails.newPort(HTTP,
httpServer.getHttpAddress().getPort()));
int httpPort = httpServer.getHttpAddress().getPort();
datanodeDetails.setPort(DatanodeDetails.newPort(HTTP, httpPort));
serviceRuntimeInfo.setHttpPort(String.valueOf(httpPort));
}

if (policy.isHttpsEnabled()) {
datanodeDetails.setPort(DatanodeDetails.newPort(HTTPS,
httpServer.getHttpsAddress().getPort()));
int httpsPort = httpServer.getHttpAddress().getPort();
datanodeDetails.setPort(DatanodeDetails.newPort(HTTPS, httpsPort));
serviceRuntimeInfo.setHttpsPort(String.valueOf(httpsPort));
}

} catch (Exception ex) {
LOG.error("HttpServer failed to start.", ex);
}


clientProtocolServer = new HddsDatanodeClientProtocolServer(
datanodeDetails, conf, HddsVersionInfo.HDDS_VERSION_INFO,
reconfigurationHandler);

int clientRpcport = clientProtocolServer.getClientRpcAddress().getPort();
serviceRuntimeInfo.setClientRpcPort(String.valueOf(clientRpcport));

// Get admin list
String starterUser =
UserGroupInformation.getCurrentUser().getShortUserName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,21 @@ public Iterator<Container<?>> getContainerIterator(HddsVolume volume) {
.iterator();
}

/**
* Get the number of containers based on the given volume.
*
* @param volume hdds volume.
* @return number of containers
*/
public long containerCount(HddsVolume volume) {
Preconditions.checkNotNull(volume);
Preconditions.checkNotNull(volume.getStorageID());
String volumeUuid = volume.getStorageID();
return containerMap.values().stream()
.filter(x -> volumeUuid.equals(x.getContainerData().getVolume()
.getStorageID())).count();
}

/**
* Return an containerMap iterator over {@link ContainerSet#containerMap}.
* @return containerMap Iterator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.annotation.InterfaceStability;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature;
import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult;
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
import org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache;
import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil;
import org.apache.hadoop.ozone.container.common.utils.RawDB;
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController;
import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures;
import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures.SchemaV3;
import org.apache.hadoop.util.Time;
Expand All @@ -44,6 +46,7 @@

import jakarta.annotation.Nullable;

import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_DATANODE_IO_METRICS_PERCENTILES_INTERVALS_SECONDS_KEY;
import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_DB_NAME;
import static org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil.initPerDiskDBStore;

Expand Down Expand Up @@ -80,6 +83,8 @@ public class HddsVolume extends StorageVolume {
private final VolumeIOStats volumeIOStats;
private final VolumeInfoMetrics volumeInfoMetrics;

private ContainerController controller;

private final AtomicLong committedBytes = new AtomicLong(); // till Open containers become full

// Mentions the type of volume
Expand Down Expand Up @@ -119,8 +124,10 @@ private HddsVolume(Builder b) throws IOException {

if (!b.getFailedVolume() && getVolumeInfo().isPresent()) {
this.setState(VolumeState.NOT_INITIALIZED);
ConfigurationSource conf = getConf();
int[] intervals = conf.getInts(OZONE_DATANODE_IO_METRICS_PERCENTILES_INTERVALS_SECONDS_KEY);
this.volumeIOStats = new VolumeIOStats(b.getVolumeRootStr(),
this.getStorageDir().toString());
this.getStorageDir().toString(), intervals);
this.volumeInfoMetrics =
new VolumeInfoMetrics(b.getVolumeRootStr(), this);

Expand Down Expand Up @@ -382,6 +389,17 @@ public void loadDbStore(boolean readOnly) throws IOException {
getStorageID());
}

public void setController(ContainerController controller) {
this.controller = controller;
}

public long getContainers() {
if (controller != null) {
return controller.getContainerCount(this);
}
return 0;
}

/**
* Pick a DbVolume for HddsVolume and init db instance.
* Use the HddsVolume directly if no DbVolume found.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,34 @@
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableRate;

/**
* This class is used to track Volume IO stats for each HDDS Volume.
*/
public class VolumeIOStats {
private String metricsSourceName = VolumeIOStats.class.getSimpleName();
private String storageDirectory;
private @Metric MutableCounterLong readBytes;
private @Metric MutableCounterLong readOpCount;
private @Metric MutableCounterLong writeBytes;
private @Metric MutableCounterLong writeOpCount;
private @Metric MutableCounterLong readTime;
private @Metric MutableCounterLong writeTime;
private final MetricsRegistry registry = new MetricsRegistry("VolumeIOStats");
@Metric
private MutableCounterLong readBytes;
@Metric
private MutableCounterLong readOpCount;
@Metric
private MutableCounterLong writeBytes;
@Metric
private MutableCounterLong writeOpCount;
@Metric
private MutableRate readTime;
@Metric
private MutableQuantiles[] readLatencyQuantiles;
@Metric
private MutableRate writeTime;
@Metric
private MutableQuantiles[] writeLatencyQuantiles;

@Deprecated
public VolumeIOStats() {
Expand All @@ -44,9 +58,24 @@ public VolumeIOStats() {
/**
* @param identifier Typically, path to volume root. e.g. /data/hdds
*/
public VolumeIOStats(String identifier, String storageDirectory) {
public VolumeIOStats(String identifier, String storageDirectory, int[] intervals) {
this.metricsSourceName += '-' + identifier;
this.storageDirectory = storageDirectory;

// Try initializing `readLatencyQuantiles` and `writeLatencyQuantiles`
if (intervals != null && intervals.length > 0) {
final int length = intervals.length;
readLatencyQuantiles = new MutableQuantiles[intervals.length];
writeLatencyQuantiles = new MutableQuantiles[intervals.length];
for (int i = 0; i < length; i++) {
readLatencyQuantiles[i] = registry.newQuantiles(
"readLatency" + intervals[i] + "s",
"Read Data File Io Latency in ms", "ops", "latency", intervals[i]);
writeLatencyQuantiles[i] = registry.newQuantiles(
"writeLatency" + intervals[i] + "s",
"Write Data File Io Latency in ms", "ops", "latency", intervals[i]);
}
}
init();
}

Expand Down Expand Up @@ -99,15 +128,21 @@ public void incWriteOpCount() {
* @param time
*/
public void incReadTime(long time) {
readTime.incr(time);
readTime.add(time);
for (MutableQuantiles q : readLatencyQuantiles) {
q.add(time);
}
}

/**
* Increment the time taken by write operation on the volume.
* @param time
*/
public void incWriteTime(long time) {
writeTime.incr(time);
writeTime.add(time);
for (MutableQuantiles q : writeLatencyQuantiles) {
q.add(time);
}
}

/**
Expand Down Expand Up @@ -147,15 +182,15 @@ public long getWriteOpCount() {
* @return long
*/
public long getReadTime() {
return readTime.value();
return (long) readTime.lastStat().total();
}

/**
* Returns total write operations time on the volume.
* @return long
*/
public long getWriteTime() {
return writeTime.value();
return (long) writeTime.lastStat().total();
}

@Metric
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public class VolumeInfoMetrics {
private final HddsVolume volume;
@Metric("Returns the RocksDB compact times of the Volume")
private MutableRate dbCompactLatency;
private long containers;

/**
* @param identifier Typically, path to volume root. E.g. /data/hdds
Expand Down Expand Up @@ -153,4 +154,11 @@ public void dbCompactTimesNanoSecondsIncr(long time) {
dbCompactLatency.add(time);
}

/**
* Return the Container Count of the Volume.
*/
@Metric("Returns the Container Count of the Volume")
public long getContainers() {
return volume.getContainers();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public BackgroundContainerDataScanner(ContainerScannerConfiguration conf,
throttler = new HddsDataTransferThrottler(conf.getBandwidthPerVolume());
canceler = new Canceler();
this.metrics = ContainerDataScannerMetrics.create(volume.toString());
this.metrics.setStorageDirectory(volume.toString());
this.minScanGap = conf.getContainerScanMinGap();
}

Expand Down
Loading

0 comments on commit 5db5824

Please sign in to comment.