From a019322e302e922f28bf0370bc9b73b7c97456c5 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:44:32 +0530 Subject: [PATCH] remote publication checksum stats (#15957) (#15960) * Remote publication checksum stats Signed-off-by: Himshikha Gupta --- .../remote/RemoteStatePublicationIT.java | 19 ++++++++++ .../remote/RemoteClusterStateService.java | 6 ++++ .../gateway/remote/RemoteDownloadStats.java | 36 +++++++++++++++++++ .../remote/RemotePersistenceStats.java | 24 ++++++++++--- .../RemoteClusterStateServiceTests.java | 10 ++++++ 5 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java index 612ca7d543647..e065a2e0e3151 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java @@ -42,6 +42,7 @@ import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteDownloadStats.CHECKSUM_VALIDATION_FAILED_COUNT; import static org.opensearch.gateway.remote.model.RemoteClusterBlocks.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA; import static org.opensearch.gateway.remote.model.RemoteCustomMetadata.CUSTOM_METADATA; @@ -228,10 +229,28 @@ private void assertDataNodeDownloadStats(NodesStatsResponse nodesStatsResponse) assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 0); assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount()); assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0); + assertEquals( + 0, + dataNodeDiscoveryStats.getClusterStateStats() + .getPersistenceStats() + .get(0) + .getExtendedFields() + .get(CHECKSUM_VALIDATION_FAILED_COUNT) + .get() + ); assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getSuccessCount() > 0); assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getFailedCount()); assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getTotalTimeInMillis() > 0); + assertEquals( + 0, + dataNodeDiscoveryStats.getClusterStateStats() + .getPersistenceStats() + .get(1) + .getExtendedFields() + .get(CHECKSUM_VALIDATION_FAILED_COUNT) + .get() + ); } private Map getMetadataFiles(BlobStoreRepository repository, String subDirectory) throws IOException { diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index e504c5abb46d3..bb2825c97bb3e 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -1619,6 +1619,12 @@ void validateClusterStateFromChecksum( failedValidation ) ); + if (isFullStateDownload) { + remoteStateStats.stateFullDownloadValidationFailed(); + } else { + remoteStateStats.stateDiffDownloadValidationFailed(); + } + if (isFullStateDownload && remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) { throw new IllegalStateException( "Cluster state checksums do not match during full state read. Validation failed for " + failedValidation diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java new file mode 100644 index 0000000000000..a8f4b33a19c37 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.coordination.PersistedStateStats; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Download stats for remote state + * + * @opensearch.internal + */ +public class RemoteDownloadStats extends PersistedStateStats { + static final String CHECKSUM_VALIDATION_FAILED_COUNT = "checksum_validation_failed_count"; + private AtomicLong checksumValidationFailedCount = new AtomicLong(0); + + public RemoteDownloadStats(String statsName) { + super(statsName); + addToExtendedFields(CHECKSUM_VALIDATION_FAILED_COUNT, checksumValidationFailedCount); + } + + public void checksumValidationFailedCount() { + checksumValidationFailedCount.incrementAndGet(); + } + + public long getChecksumValidationFailedCount() { + return checksumValidationFailedCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java index 417ebdafd3ba7..11f26ac8b3ed9 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java @@ -18,16 +18,16 @@ public class RemotePersistenceStats { RemoteUploadStats remoteUploadStats; - PersistedStateStats remoteDiffDownloadStats; - PersistedStateStats remoteFullDownloadStats; + RemoteDownloadStats remoteDiffDownloadStats; + RemoteDownloadStats remoteFullDownloadStats; final String FULL_DOWNLOAD_STATS = "remote_full_download"; final String DIFF_DOWNLOAD_STATS = "remote_diff_download"; public RemotePersistenceStats() { remoteUploadStats = new RemoteUploadStats(); - remoteDiffDownloadStats = new PersistedStateStats(DIFF_DOWNLOAD_STATS); - remoteFullDownloadStats = new PersistedStateStats(FULL_DOWNLOAD_STATS); + remoteDiffDownloadStats = new RemoteDownloadStats(DIFF_DOWNLOAD_STATS); + remoteFullDownloadStats = new RemoteDownloadStats(FULL_DOWNLOAD_STATS); } public void cleanUpAttemptFailed() { @@ -90,6 +90,22 @@ public void stateDiffDownloadFailed() { remoteDiffDownloadStats.stateFailed(); } + public void stateDiffDownloadValidationFailed() { + remoteDiffDownloadStats.checksumValidationFailedCount(); + } + + public void stateFullDownloadValidationFailed() { + remoteFullDownloadStats.checksumValidationFailedCount(); + } + + public long getStateDiffDownloadValidationFailed() { + return remoteDiffDownloadStats.getChecksumValidationFailedCount(); + } + + public long getStateFullDownloadValidationFailed() { + return remoteFullDownloadStats.getChecksumValidationFailedCount(); + } + public PersistedStateStats getUploadStats() { return remoteUploadStats; } diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 79f6c13bd694f..a6887f1dc1124 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -3232,6 +3232,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithNullC anyString(), anyBoolean() ); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws IOException { @@ -3264,6 +3265,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws ); mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); verify(mockService, times(1)).validateClusterStateFromChecksum(manifest, clusterState, ClusterName.DEFAULT.value(), NODE_ID, true); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationModeNone() throws IOException { @@ -3296,6 +3298,7 @@ public void testGetClusterStateForManifestWithChecksumValidationModeNone() throw ); mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); verify(mockService, times(0)).validateClusterStateFromChecksum(any(), any(), anyString(), anyString(), anyBoolean()); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMismatch() throws IOException { @@ -3338,6 +3341,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMisma NODE_ID, true ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatch() throws IOException { @@ -3384,6 +3388,7 @@ public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatc NODE_ID, true ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksum() throws IOException { @@ -3425,6 +3430,7 @@ public void testGetClusterStateUsingDiffWithChecksum() throws IOException { eq(NODE_ID), eq(false) ); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOException { @@ -3466,6 +3472,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOExceptio eq(NODE_ID), eq(false) ); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws IOException { @@ -3506,6 +3513,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws I eq(NODE_ID), eq(false) ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws IOException { @@ -3567,6 +3575,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws I eq(NODE_ID), eq(false) ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOException { @@ -3628,6 +3637,7 @@ public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOExceptio eq(NODE_ID), eq(false) ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException {