Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix] Fix TieredSpilloverCache stats not adding correctly when shards are closed #16560

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix warnings from SLF4J on startup when repository-s3 is installed ([#16194](https://github.com/opensearch-project/OpenSearch/pull/16194))
- Fix protobuf-java leak through client library dependencies ([#16254](https://github.com/opensearch-project/OpenSearch/pull/16254))
- Fix multi-search with template doesn't return status code ([#16265](https://github.com/opensearch-project/OpenSearch/pull/16265))
- [Tiered Caching] Fix bug in cache stats API ([#16560](https://github.com/opensearch-project/OpenSearch/pull/16560))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse;
import org.opensearch.action.admin.indices.stats.CommonStatsFlags;
import org.opensearch.action.search.SearchResponse;
Expand Down Expand Up @@ -40,6 +41,7 @@
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_NAME;
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_DISK;
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_ON_HEAP;
import static org.opensearch.indices.IndicesService.INDICES_CACHE_CLEAN_INTERVAL_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse;

Expand Down Expand Up @@ -417,6 +419,55 @@ public void testStatsWithMultipleSegments() throws Exception {
assertTrue(diskCacheStat.getEvictions() == 0);
}

public void testClosingShard() throws Exception {
// Closing the shard should totally remove the stats associated with that shard.
internalCluster().startNodes(
1,
Settings.builder()
.put(defaultSettings(HEAP_CACHE_SIZE_STRING, getNumberOfSegments()))
.put(
TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(),
new TimeValue(0, TimeUnit.SECONDS)
)
.put(INDICES_CACHE_CLEAN_INTERVAL_SETTING.getKey(), new TimeValue(1))
.build()
);
String index = "index";
Client client = client();
startIndex(client, index);

// First search one time to see how big a single value will be
searchIndex(client, index, 0);
// get total stats
long singleSearchSize = getTotalStats(client).getSizeInBytes();
// Select numbers so we get some values on both heap and disk
int itemsOnHeap = HEAP_CACHE_SIZE / (int) singleSearchSize;
int itemsOnDisk = 1 + randomInt(30); // The first one we search (to get the size) always goes to disk
int expectedEntries = itemsOnHeap + itemsOnDisk;

for (int i = 1; i < expectedEntries; i++) {
// Cause misses
searchIndex(client, index, i);
}
int expectedMisses = itemsOnHeap + itemsOnDisk;

// Cause some hits
int expectedHits = randomIntBetween(itemsOnHeap, expectedEntries); // Select it so some hits come from both tiers
for (int i = 0; i < expectedHits; i++) {
searchIndex(client, index, i);
}

// Check the new stats API values are as expected
assertEquals(
new ImmutableCacheStats(expectedHits, expectedMisses, 0, expectedEntries * singleSearchSize, expectedEntries),
getTotalStats(client)
);

// Closing the index should close the shard
assertAcked(client().admin().indices().delete(new DeleteIndexRequest("index")).get());
assertEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), getTotalStats(client));
}

private void startIndex(Client client, String indexName) throws InterruptedException {
assertAcked(
client.admin()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,14 +373,14 @@ private V compute(ICacheKey<K> key, LoadAwareCacheLoader<ICacheKey<K>, V> loader

@Override
public void invalidate(ICacheKey<K> key) {
for (Map.Entry<ICache<K, V>, TierInfo> cacheEntry : caches.entrySet()) {
if (key.getDropStatsForDimensions()) {
List<String> dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, cacheEntry.getValue().tierName);
statsHolder.removeDimensions(dimensionValues);
}
if (key.key != null) {
try (ReleasableLock ignore = writeLock.acquire()) {
cacheEntry.getKey().invalidate(key);
if (key.getDropStatsForDimensions()) {
statsHolder.removeDimensions(key.dimensions);
} else {
for (Map.Entry<ICache<K, V>, TierInfo> cacheEntry : caches.entrySet()) {
if (key.key != null) {
try (ReleasableLock ignore = writeLock.acquire()) {
cacheEntry.getKey().invalidate(key);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ public class TieredSpilloverCacheStatsHolder extends DefaultCacheStatsHolder {
/** Dimension value for on-disk cache, like EhcacheDiskCache. */
public static final String TIER_DIMENSION_VALUE_DISK = "disk";

static final List<String> TIER_VALUES = List.of(TIER_DIMENSION_VALUE_ON_HEAP, TIER_DIMENSION_VALUE_DISK);

/**
* Constructor for the stats holder.
* @param originalDimensionNames the original dimension names, not including TIER_DIMENSION_NAME
Expand All @@ -65,7 +67,7 @@ private static List<String> getDimensionNamesWithTier(List<String> dimensionName
/**
* Add tierValue to the end of a copy of the initial dimension values, so they can appropriately be used in this stats holder.
*/
List<String> getDimensionsWithTierValue(List<String> initialDimensions, String tierValue) {
public List<String> getDimensionsWithTierValue(List<String> initialDimensions, String tierValue) {
List<String> result = new ArrayList<>(initialDimensions);
result.add(tierValue);
return result;
Expand Down Expand Up @@ -164,7 +166,20 @@ public void decrementItems(List<String> dimensionValues) {
super.decrementItems(dimensionValues);
}

void setDiskCacheEnabled(boolean diskCacheEnabled) {
public void setDiskCacheEnabled(boolean diskCacheEnabled) {
this.diskCacheEnabled = diskCacheEnabled;
}

@Override
public void removeDimensions(List<String> dimensionValues) {
assert dimensionValues.size() == dimensionNames.size() - 1
: "Must specify a value for every dimension except tier when removing from StatsHolder";
// As we are removing nodes from the tree, obtain the lock
lock.lock();
try {
removeDimensionsHelper(dimensionValues, statsRoot, 0);
} finally {
lock.unlock();
}
}
}
Loading
Loading