Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDDS-10206. Expose jmx metrics for snapshot cache size on the ozone manager. #6138

Merged
merged 15 commits into from
Apr 5, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;

/**
* This class is for maintaining Ozone Manager statistics.
Expand Down Expand Up @@ -77,6 +78,7 @@ public class OMMetrics implements OmMetadataReaderMetrics {
private @Metric MutableCounterLong numSnapshotPurges;
private @Metric MutableCounterLong numSnapshotSetProperties;

private @Metric MutableGaugeInt numSnapshotCacheSize;
private @Metric MutableCounterLong numGetFileStatus;
private @Metric MutableCounterLong numCreateDirectory;
private @Metric MutableCounterLong numCreateFile;
Expand Down Expand Up @@ -541,6 +543,17 @@ public void decNumSnapshotDeleted() {
numSnapshotDeleted.incr(-1);
}

public int getNumSnapshotCacheSize() {
return numSnapshotCacheSize.value();
}
public void incNumSnapshotCacheSize() {
numSnapshotCacheSize.incr();
ceekay47 marked this conversation as resolved.
Show resolved Hide resolved
}

public void decNumSnapshotCacheSize() {
numSnapshotCacheSize.decr();
}

public void incNumCompleteMultipartUploadFails() {
numCompleteMultipartUploadFails.incr();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ public OmSnapshotManager(OzoneManager ozoneManager) {
};

// Init snapshot cache
this.snapshotCache = new SnapshotCache(loader, softCacheSize);
this.snapshotCache = new SnapshotCache(loader, softCacheSize, ozoneManager.getMetrics());

this.snapshotDiffManager = new SnapshotDiffManager(snapshotDiffDb, differ,
ozoneManager, snapDiffJobCf, snapDiffReportCf,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.CacheLoader;
import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.OmSnapshot;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.slf4j.Logger;
Expand Down Expand Up @@ -51,10 +52,13 @@ public class SnapshotCache {
// opened on the OM.
private final int cacheSizeLimit;

public SnapshotCache(CacheLoader<UUID, OmSnapshot> cacheLoader, int cacheSizeLimit) {
private final OMMetrics omMetrics;

public SnapshotCache(CacheLoader<UUID, OmSnapshot> cacheLoader, int cacheSizeLimit, OMMetrics omMetrics) {
this.dbMap = new ConcurrentHashMap<>();
this.cacheLoader = cacheLoader;
this.cacheSizeLimit = cacheSizeLimit;
this.omMetrics = omMetrics;
}

@VisibleForTesting
Expand Down Expand Up @@ -83,6 +87,7 @@ public void invalidate(UUID key) throws IOException {
} catch (IOException e) {
throw new IllegalStateException("Failed to close snapshotId: " + key, e);
}
omMetrics.decNumSnapshotCacheSize();
}
return null;
});
Expand All @@ -104,6 +109,7 @@ public void invalidateAll() {
throw new IllegalStateException("Failed to close snapshot", e);
}
it.remove();
omMetrics.decNumSnapshotCacheSize();
}
}

Expand Down Expand Up @@ -150,14 +156,14 @@ public ReferenceCounted<OmSnapshot> get(UUID key) throws IOException {
// Unexpected and unknown exception thrown from CacheLoader#load
throw new IllegalStateException(ex);
}
omMetrics.incNumSnapshotCacheSize();
}
if (v != null) {
// When RC OmSnapshot is successfully loaded
v.incrementRefCount();
}
return v;
});

if (rcOmSnapshot == null) {
// The only exception that would fall through the loader logic above
// is OMException with FILE_NOT_FOUND.
Expand Down Expand Up @@ -227,6 +233,7 @@ private void cleanupInternal() {
} catch (IOException ex) {
throw new IllegalStateException("Error while closing snapshot DB.", ex);
}
omMetrics.decNumSnapshotCacheSize();
return null;
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.hadoop.ozone.om.snapshot;

import com.google.common.cache.CacheLoader;
import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.OmSnapshot;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.AfterEach;
Expand Down Expand Up @@ -52,6 +53,8 @@ class TestSnapshotCache {
private static CacheLoader<UUID, OmSnapshot> cacheLoader;
private SnapshotCache snapshotCache;

private OMMetrics omMetrics;

@BeforeAll
static void beforeAll() throws Exception {
cacheLoader = mock(CacheLoader.class);
Expand All @@ -74,7 +77,8 @@ static void beforeAll() throws Exception {
@BeforeEach
void setUp() {
// Reset cache for each test case
snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT);
omMetrics = OMMetrics.create();
ceekay47 marked this conversation as resolved.
Show resolved Hide resolved
snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT, omMetrics);
}

@AfterEach
Expand All @@ -87,11 +91,13 @@ void tearDown() {
@DisplayName("get()")
void testGet() throws IOException {
final UUID dbKey1 = UUID.randomUUID();
assertEquals(0, omMetrics.getNumSnapshotCacheSize());
ReferenceCounted<OmSnapshot> omSnapshot = snapshotCache.get(dbKey1);
assertNotNull(omSnapshot);
assertNotNull(omSnapshot.get());
assertInstanceOf(OmSnapshot.class, omSnapshot.get());
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
}

@Test
Expand All @@ -101,12 +107,14 @@ void testGetTwice() throws IOException {
ReferenceCounted<OmSnapshot> omSnapshot1 = snapshotCache.get(dbKey1);
assertNotNull(omSnapshot1);
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

ReferenceCounted<OmSnapshot> omSnapshot1again = snapshotCache.get(dbKey1);
// Should be the same instance
assertEquals(omSnapshot1, omSnapshot1again);
assertEquals(omSnapshot1.get(), omSnapshot1again.get());
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
}

@Test
Expand All @@ -117,10 +125,12 @@ void testReleaseByDbKey() throws IOException {
assertNotNull(omSnapshot1);
assertNotNull(omSnapshot1.get());
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

snapshotCache.release(dbKey1);
// Entry will not be immediately evicted
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
}

@Test
Expand All @@ -130,13 +140,16 @@ void testInvalidate() throws IOException {
ReferenceCounted<OmSnapshot> omSnapshot = snapshotCache.get(dbKey1);
assertNotNull(omSnapshot);
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

snapshotCache.release(dbKey1);
// Entry will not be immediately evicted
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

snapshotCache.invalidate(dbKey1);
assertEquals(0, snapshotCache.size());
assertEquals(0, omMetrics.getNumSnapshotCacheSize());
}

@Test
Expand All @@ -146,28 +159,34 @@ void testInvalidateAll() throws IOException {
ReferenceCounted<OmSnapshot> omSnapshot1 = snapshotCache.get(dbKey1);
assertNotNull(omSnapshot1);
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey2 = UUID.randomUUID();
ReferenceCounted<OmSnapshot> omSnapshot2 = snapshotCache.get(dbKey2);
assertNotNull(omSnapshot2);
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());
// Should be difference omSnapshot instances
assertNotEquals(omSnapshot1, omSnapshot2);

final UUID dbKey3 = UUID.randomUUID();
ReferenceCounted<OmSnapshot> omSnapshot3 = snapshotCache.get(dbKey3);
assertNotNull(omSnapshot3);
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());

snapshotCache.release(dbKey1);
// Entry will not be immediately evicted
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());

snapshotCache.invalidate(dbKey1);
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());

snapshotCache.invalidateAll();
assertEquals(0, snapshotCache.size());
assertEquals(0, omMetrics.getNumSnapshotCacheSize());
}

private void assertEntryExistence(UUID key, boolean shouldExist) {
Expand All @@ -191,26 +210,33 @@ void testEviction1() throws IOException {
final UUID dbKey1 = UUID.randomUUID();
snapshotCache.get(dbKey1);
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
snapshotCache.release(dbKey1);
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey2 = UUID.randomUUID();
snapshotCache.get(dbKey2);
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());
snapshotCache.release(dbKey2);
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey3 = UUID.randomUUID();
snapshotCache.get(dbKey3);
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());
snapshotCache.release(dbKey3);
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey4 = UUID.randomUUID();
snapshotCache.get(dbKey4);
// dbKey1, dbKey2 and dbKey3 would have been evicted by the end of the last get() because
// those were release()d.
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
assertEntryExistence(dbKey1, false);
}

Expand All @@ -221,25 +247,30 @@ void testEviction2() throws IOException {
final UUID dbKey1 = UUID.randomUUID();
snapshotCache.get(dbKey1);
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey2 = UUID.randomUUID();
snapshotCache.get(dbKey2);
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey3 = UUID.randomUUID();
snapshotCache.get(dbKey3);
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey4 = UUID.randomUUID();
snapshotCache.get(dbKey4);
// dbKey1 would not have been evicted because it is not release()d
assertEquals(4, snapshotCache.size());
assertEquals(4, omMetrics.getNumSnapshotCacheSize());
assertEntryExistence(dbKey1, true);

// Releasing dbKey2 at this point should immediately trigger its eviction
// because the cache size exceeded the soft limit
snapshotCache.release(dbKey2);
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());
assertEntryExistence(dbKey2, false);
assertEntryExistence(dbKey1, true);
}
Expand All @@ -252,41 +283,50 @@ void testEviction3WithClose() throws IOException {
try (ReferenceCounted<OmSnapshot> rcOmSnapshot = snapshotCache.get(dbKey1)) {
assertEquals(1L, rcOmSnapshot.getTotalRefCount());
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
}
// ref count should have been decreased because it would be close()d
// upon exiting try-with-resources.
assertEquals(0L, snapshotCache.getDbMap().get(dbKey1).getTotalRefCount());
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey2 = UUID.randomUUID();
try (ReferenceCounted<OmSnapshot> rcOmSnapshot = snapshotCache.get(dbKey2)) {
assertEquals(1L, rcOmSnapshot.getTotalRefCount());
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());
// Get dbKey2 entry a second time
try (ReferenceCounted<OmSnapshot> rcOmSnapshot2 = snapshotCache.get(dbKey2)) {
assertEquals(2L, rcOmSnapshot.getTotalRefCount());
assertEquals(2L, rcOmSnapshot2.getTotalRefCount());
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());
}
assertEquals(1L, rcOmSnapshot.getTotalRefCount());
}
assertEquals(0L, snapshotCache.getDbMap().get(dbKey2).getTotalRefCount());
assertEquals(2, snapshotCache.size());
assertEquals(2, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey3 = UUID.randomUUID();
try (ReferenceCounted<OmSnapshot> rcOmSnapshot = snapshotCache.get(dbKey3)) {
assertEquals(1L, rcOmSnapshot.getTotalRefCount());
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());
}
assertEquals(0L, snapshotCache.getDbMap().get(dbKey3).getTotalRefCount());
assertEquals(3, snapshotCache.size());
assertEquals(3, omMetrics.getNumSnapshotCacheSize());

final UUID dbKey4 = UUID.randomUUID();
try (ReferenceCounted<OmSnapshot> rcOmSnapshot = snapshotCache.get(dbKey4)) {
assertEquals(1L, rcOmSnapshot.getTotalRefCount());
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
}
assertEquals(0L, snapshotCache.getDbMap().get(dbKey4).getTotalRefCount());
assertEquals(1, snapshotCache.size());
assertEquals(1, omMetrics.getNumSnapshotCacheSize());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ public void init() throws RocksDBException, IOException, ExecutionException {
omSnapshotManager = mock(OmSnapshotManager.class);
when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager);
when(omSnapshotManager.isSnapshotStatus(any(), any())).thenReturn(true);
SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10);
SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10, omMetrics);

when(omSnapshotManager.getActiveSnapshot(anyString(), anyString(), anyString()))
.thenAnswer(invocationOnMock -> {
Expand Down