Skip to content

Commit

Permalink
HDDS-9200. [Snapshot] Added logs and metrics for snapshot purge and s…
Browse files Browse the repository at this point in the history
…et property APIs (#6453)
  • Loading branch information
hemantk-12 authored Apr 3, 2024
1 parent 7da5ecb commit 3467db1
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ public class OMMetrics implements OmMetadataReaderMetrics {
private @Metric MutableCounterLong numSnapshotLists;
private @Metric MutableCounterLong numSnapshotDiffJobs;
private @Metric MutableCounterLong numSnapshotInfos;
private @Metric MutableCounterLong numSnapshotPurges;
private @Metric MutableCounterLong numSnapshotSetProperties;

private @Metric MutableCounterLong numGetFileStatus;
private @Metric MutableCounterLong numCreateDirectory;
Expand Down Expand Up @@ -136,6 +138,8 @@ public class OMMetrics implements OmMetadataReaderMetrics {
private @Metric MutableCounterLong numSnapshotListFails;
private @Metric MutableCounterLong numSnapshotDiffJobFails;
private @Metric MutableCounterLong numSnapshotInfoFails;
private @Metric MutableCounterLong numSnapshotPurgeFails;
private @Metric MutableCounterLong numSnapshotSetPropertyFails;

private @Metric MutableCounterLong numSnapshotActive;
private @Metric MutableCounterLong numSnapshotDeleted;
Expand Down Expand Up @@ -479,6 +483,14 @@ public void incNumSnapshotInfos() {
numSnapshotInfos.incr();
}

public void incNumSnapshotPurges() {
numSnapshotPurges.incr();
}

public void incNumSnapshotSetProperties() {
numSnapshotSetProperties.incr();
}

public void incNumSnapshotDiffJobs() {
numSnapshotDiffJobs.incr();
}
Expand All @@ -494,6 +506,15 @@ public void incNumSnapshotDiffJobFails() {
public void incNumSnapshotInfoFails() {
numSnapshotInfoFails.incr();
}

public void incNumSnapshotPurgeFails() {
numSnapshotPurgeFails.incr();
}

public void incNumSnapshotSetPropertyFails() {
numSnapshotSetPropertyFails.incr();
}

public void setNumSnapshotActive(long num) {
long currVal = numSnapshotActive.value();
numSnapshotActive.incr(num - currVal);
Expand Down Expand Up @@ -1290,6 +1311,14 @@ public long getNumSnapshotDiffJobs() {
return numSnapshotDiffJobs.value();
}

public long getNumSnapshotPurges() {
return numSnapshotPurges.value();
}

public long getNumSnapshotSetProperties() {
return numSnapshotSetProperties.value();
}

public long getNumSnapshotCreateFails() {
return numSnapshotCreateFails.value();
}
Expand All @@ -1314,6 +1343,13 @@ public long getNumSnapshotDeleted() {
return numSnapshotDeleted.value();
}

public long getNumSnapshotPurgeFails() {
return numSnapshotPurgeFails.value();
}

public long getNumSnapshotSetPropertyFails() {
return numSnapshotSetPropertyFails.value();
}

public void incNumTrashRenames() {
numTrashRenames.incr();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.ozone.om.OMMetadataManager;
import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
Expand Down Expand Up @@ -66,6 +67,8 @@ public OMSnapshotPurgeRequest(OMRequest omRequest) {

@Override
public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIndex termIndex) {
OMMetrics omMetrics = ozoneManager.getMetrics();

final long trxnLogIndex = termIndex.getIndex();

OmSnapshotManager omSnapshotManager = ozoneManager.getOmSnapshotManager();
Expand Down Expand Up @@ -150,9 +153,16 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(),
snapshotDbKeys, updatedSnapInfos,
updatedPathPreviousAndGlobalSnapshots);

omMetrics.incNumSnapshotPurges();
LOG.info("Successfully executed snapshotPurgeRequest: {{}} along with updating deep clean flags for " +
"snapshots: {} and global and previous for snapshots:{}.",
snapshotPurgeRequest, updatedSnapInfos.keySet(), updatedPathPreviousAndGlobalSnapshots.keySet());
} catch (IOException ex) {
omClientResponse = new OMSnapshotPurgeResponse(
createErrorOMResponse(omResponse, ex));
omMetrics.incNumSnapshotPurgeFails();
LOG.error("Failed to execute snapshotPurgeRequest:{{}}.", snapshotPurgeRequest, ex);
}

return omClientResponse;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.ozone.om.request.snapshot;

import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
import org.apache.hadoop.hdds.utils.db.cache.CacheValue;
Expand Down Expand Up @@ -52,6 +53,7 @@ public OMSnapshotSetPropertyRequest(OMRequest omRequest) {

@Override
public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIndex termIndex) {
OMMetrics omMetrics = ozoneManager.getMetrics();

OMClientResponse omClientResponse = null;
OMMetadataManager metadataManager = ozoneManager.getMetadataManager();
Expand Down Expand Up @@ -117,9 +119,13 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
CacheValue.get(termIndex.getIndex(), updatedSnapInfo));
omClientResponse = new OMSnapshotSetPropertyResponse(
omResponse.build(), updatedSnapInfo);
omMetrics.incNumSnapshotSetProperties();
LOG.info("Successfully executed snapshotSetPropertyRequest: {{}}.", setSnapshotPropertyRequest);
} catch (IOException ex) {
omClientResponse = new OMSnapshotSetPropertyResponse(
createErrorOMResponse(omResponse, ex));
omMetrics.incNumSnapshotSetPropertyFails();
LOG.error("Failed to execute snapshotSetPropertyRequest: {{}}.", setSnapshotPropertyRequest, ex);
} finally {
if (acquiredSnapshotLock) {
mergeOmLockDetails(metadataManager.getLock()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.utils.db.BatchOperation;
import org.apache.hadoop.hdds.utils.db.RDBStore;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.audit.AuditLogger;
import org.apache.hadoop.ozone.om.IOmMetadataReader;
Expand Down Expand Up @@ -63,6 +64,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status.INTERNAL_ERROR;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
Expand Down Expand Up @@ -233,6 +235,8 @@ private void purgeSnapshots(OMRequest snapshotPurgeRequest)

@Test
public void testValidateAndUpdateCache() throws Exception {
long initialSnapshotPurgeCount = omMetrics.getNumSnapshotPurges();
long initialSnapshotPurgeFailCount = omMetrics.getNumSnapshotPurgeFails();

List<String> snapshotDbKeysToPurge = createSnapshots(10);
assertFalse(omMetadataManager.getSnapshotInfoTable().isEmpty());
Expand Down Expand Up @@ -260,6 +264,36 @@ public void testValidateAndUpdateCache() throws Exception {
for (Path checkpoint : checkpointPaths) {
assertFalse(Files.exists(checkpoint));
}
assertEquals(initialSnapshotPurgeCount + 1, omMetrics.getNumSnapshotPurges());
assertEquals(initialSnapshotPurgeFailCount, omMetrics.getNumSnapshotPurgeFails());
}

/**
* This test is mainly to validate metrics and error code.
*/
@Test
public void testValidateAndUpdateCacheFailure() throws Exception {
long initialSnapshotPurgeCount = omMetrics.getNumSnapshotPurges();
long initialSnapshotPurgeFailCount = omMetrics.getNumSnapshotPurgeFails();

List<String> snapshotDbKeysToPurge = createSnapshots(10);

OmMetadataManagerImpl mockedMetadataManager = mock(OmMetadataManagerImpl.class);
Table<String, SnapshotInfo> mockedSnapshotInfoTable = mock(Table.class);

when(mockedSnapshotInfoTable.get(anyString())).thenThrow(new IOException("Injected fault error."));
when(mockedMetadataManager.getSnapshotInfoTable()).thenReturn(mockedSnapshotInfoTable);
when(ozoneManager.getMetadataManager()).thenReturn(mockedMetadataManager);

OMRequest snapshotPurgeRequest = createPurgeKeysRequest(snapshotDbKeysToPurge);
OMSnapshotPurgeRequest omSnapshotPurgeRequest = preExecute(snapshotPurgeRequest);

OMSnapshotPurgeResponse omSnapshotPurgeResponse = (OMSnapshotPurgeResponse)
omSnapshotPurgeRequest.validateAndUpdateCache(ozoneManager, 200L);

assertEquals(INTERNAL_ERROR, omSnapshotPurgeResponse.getOMResponse().getStatus());
assertEquals(initialSnapshotPurgeCount, omMetrics.getNumSnapshotPurges());
assertEquals(initialSnapshotPurgeFailCount + 1, omMetrics.getNumSnapshotPurgeFails());
}

// TODO: clean up: Do we this test after
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.om.OMConfigKeys;
import org.apache.hadoop.ozone.om.OMMetadataManager;
import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
import org.apache.hadoop.ozone.om.OzoneManager;
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
Expand All @@ -48,6 +49,7 @@
import java.util.List;
import java.util.UUID;

import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status.INTERNAL_ERROR;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.Mockito.anyString;
Expand All @@ -62,7 +64,7 @@ public class TestOMSnapshotSetPropertyRequestAndResponse {
private BatchOperation batchOperation;
private OzoneManager ozoneManager;
private OMMetadataManager omMetadataManager;

private OMMetrics omMetrics;
private String volumeName;
private String bucketName;
private String snapName;
Expand All @@ -71,6 +73,7 @@ public class TestOMSnapshotSetPropertyRequestAndResponse {

@BeforeEach
void setup(@TempDir File testDir) throws Exception {
omMetrics = OMMetrics.create();
ozoneManager = mock(OzoneManager.class);
OMLayoutVersionManager lvm = mock(OMLayoutVersionManager.class);
when(lvm.isAllowed(anyString())).thenReturn(true);
Expand All @@ -84,6 +87,7 @@ void setup(@TempDir File testDir) throws Exception {
omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration,
ozoneManager);
when(ozoneManager.getMetadataManager()).thenReturn(omMetadataManager);
when(ozoneManager.getMetrics()).thenReturn(omMetrics);

volumeName = UUID.randomUUID().toString();
bucketName = UUID.randomUUID().toString();
Expand All @@ -94,6 +98,9 @@ void setup(@TempDir File testDir) throws Exception {

@Test
public void testValidateAndUpdateCache() throws IOException {
long initialSnapshotSetPropertyCount = omMetrics.getNumSnapshotSetProperties();
long initialSnapshotSetPropertyFailCount = omMetrics.getNumSnapshotSetPropertyFails();

createSnapshotDataForTest();
assertFalse(omMetadataManager.getSnapshotInfoTable().isEmpty());
List<OMRequest> snapshotUpdateSizeRequests =
Expand All @@ -120,6 +127,9 @@ public void testValidateAndUpdateCache() throws IOException {
omMetadataManager.getStore().commitBatchOperation(batchOperation);
}

assertEquals(initialSnapshotSetPropertyCount + snapshotUpdateSizeRequests.size(),
omMetrics.getNumSnapshotSetProperties());
assertEquals(initialSnapshotSetPropertyFailCount, omMetrics.getNumSnapshotSetPropertyFails());
// Check if the exclusive size is set.
try (TableIterator<String, ? extends Table.KeyValue<String, SnapshotInfo>>
iterator = omMetadataManager.getSnapshotInfoTable().iterator()) {
Expand All @@ -134,6 +144,42 @@ public void testValidateAndUpdateCache() throws IOException {
}
}

/**
* This test is mainly to validate metrics and error code.
*/
@Test
public void testValidateAndUpdateCacheFailure() throws IOException {
long initialSnapshotSetPropertyCount = omMetrics.getNumSnapshotSetProperties();
long initialSnapshotSetPropertyFailCount = omMetrics.getNumSnapshotSetPropertyFails();

createSnapshotDataForTest();
assertFalse(omMetadataManager.getSnapshotInfoTable().isEmpty());
List<OMRequest> snapshotUpdateSizeRequests = createSnapshotUpdateSizeRequest();

OmMetadataManagerImpl mockedMetadataManager = mock(OmMetadataManagerImpl.class);
Table<String, SnapshotInfo> mockedSnapshotInfoTable = mock(Table.class);

when(mockedSnapshotInfoTable.get(anyString())).thenThrow(new IOException("Injected fault error."));
when(mockedMetadataManager.getSnapshotInfoTable()).thenReturn(mockedSnapshotInfoTable);
when(ozoneManager.getMetadataManager()).thenReturn(mockedMetadataManager);

for (OMRequest omRequest: snapshotUpdateSizeRequests) {
OMSnapshotSetPropertyRequest omSnapshotSetPropertyRequest = new OMSnapshotSetPropertyRequest(omRequest);
OMRequest modifiedOmRequest = omSnapshotSetPropertyRequest.preExecute(ozoneManager);
omSnapshotSetPropertyRequest = new OMSnapshotSetPropertyRequest(modifiedOmRequest);

// Validate and Update Cache
OMSnapshotSetPropertyResponse omSnapshotSetPropertyResponse = (OMSnapshotSetPropertyResponse)
omSnapshotSetPropertyRequest.validateAndUpdateCache(ozoneManager, 200L);

assertEquals(INTERNAL_ERROR, omSnapshotSetPropertyResponse.getOMResponse().getStatus());
}

assertEquals(initialSnapshotSetPropertyCount, omMetrics.getNumSnapshotSetProperties());
assertEquals(initialSnapshotSetPropertyFailCount + snapshotUpdateSizeRequests.size(),
omMetrics.getNumSnapshotSetPropertyFails());
}

private void assertCacheValues(String dbKey) {
CacheValue<SnapshotInfo> cacheValue = omMetadataManager
.getSnapshotInfoTable()
Expand Down

0 comments on commit 3467db1

Please sign in to comment.