Skip to content

Commit

Permalink
[Bugfix] Fix TieredSpilloverCache stats not adding correctly when sha…
Browse files Browse the repository at this point in the history
…rds are closed (#16560)

* added draft tests for tsc stats holder

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* first draft tsc stats bugfix

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Complete tests

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Cleanup

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Integrate fix with TSC

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Add IT

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Refactor cache package names in TSC module to match with server

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* changelog

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Revert "Refactor cache package names in TSC module to match with server"

This reverts commit 3b15a7a.

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Addressed Sagar's comments

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* More package fixes

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

* Addressed andross's comments

Signed-off-by: Peter Alfonsi <petealft@amazon.com>

---------

Signed-off-by: Peter Alfonsi <petealft@amazon.com>
Signed-off-by: Peter Alfonsi <peter.alfonsi@gmail.com>
Co-authored-by: Peter Alfonsi <petealft@amazon.com>
(cherry picked from commit c82cd2e)
  • Loading branch information
peteralfonsi authored and Peter Alfonsi committed Dec 5, 2024
1 parent 8f13e69 commit aeb226d
Show file tree
Hide file tree
Showing 7 changed files with 512 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix `doc_values` only (`index:false`) IP field searching for masks ([#16628](https://github.com/opensearch-project/OpenSearch/pull/16628))
- Fix stale cluster state custom file deletion ([#16670](https://github.com/opensearch-project/OpenSearch/pull/16670))
- Bound the size of cache in deprecation logger ([16702](https://github.com/opensearch-project/OpenSearch/issues/16702))
- [Tiered Caching] Fix bug in cache stats API ([#16560](https://github.com/opensearch-project/OpenSearch/pull/16560))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse;
import org.opensearch.action.admin.indices.stats.CommonStatsFlags;
import org.opensearch.action.search.SearchResponse;
Expand Down Expand Up @@ -40,6 +41,7 @@
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_NAME;
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_DISK;
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_ON_HEAP;
import static org.opensearch.indices.IndicesService.INDICES_CACHE_CLEAN_INTERVAL_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse;

Expand Down Expand Up @@ -417,6 +419,55 @@ public void testStatsWithMultipleSegments() throws Exception {
assertTrue(diskCacheStat.getEvictions() == 0);
}

public void testClosingShard() throws Exception {
// Closing the shard should totally remove the stats associated with that shard.
internalCluster().startNodes(
1,
Settings.builder()
.put(defaultSettings(HEAP_CACHE_SIZE_STRING, getNumberOfSegments()))
.put(
TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(),
new TimeValue(0, TimeUnit.SECONDS)
)
.put(INDICES_CACHE_CLEAN_INTERVAL_SETTING.getKey(), new TimeValue(1))
.build()
);
String index = "index";
Client client = client();
startIndex(client, index);

// First search one time to see how big a single value will be
searchIndex(client, index, 0);
// get total stats
long singleSearchSize = getTotalStats(client).getSizeInBytes();
// Select numbers so we get some values on both heap and disk
int itemsOnHeap = HEAP_CACHE_SIZE / (int) singleSearchSize;
int itemsOnDisk = 1 + randomInt(30); // The first one we search (to get the size) always goes to disk
int expectedEntries = itemsOnHeap + itemsOnDisk;

for (int i = 1; i < expectedEntries; i++) {
// Cause misses
searchIndex(client, index, i);
}
int expectedMisses = itemsOnHeap + itemsOnDisk;

// Cause some hits
int expectedHits = randomIntBetween(itemsOnHeap, expectedEntries); // Select it so some hits come from both tiers
for (int i = 0; i < expectedHits; i++) {
searchIndex(client, index, i);
}

// Check the new stats API values are as expected
assertEquals(
new ImmutableCacheStats(expectedHits, expectedMisses, 0, expectedEntries * singleSearchSize, expectedEntries),
getTotalStats(client)
);

// Closing the index should close the shard
assertAcked(client().admin().indices().delete(new DeleteIndexRequest("index")).get());
assertEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), getTotalStats(client));
}

private void startIndex(Client client, String indexName) throws InterruptedException {
assertAcked(
client.admin()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,12 +373,10 @@ private V compute(ICacheKey<K> key, LoadAwareCacheLoader<ICacheKey<K>, V> loader

@Override
public void invalidate(ICacheKey<K> key) {
for (Map.Entry<ICache<K, V>, TierInfo> cacheEntry : caches.entrySet()) {
if (key.getDropStatsForDimensions()) {
List<String> dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, cacheEntry.getValue().tierName);
statsHolder.removeDimensions(dimensionValues);
}
if (key.key != null) {
if (key.getDropStatsForDimensions()) {
statsHolder.removeDimensions(key.dimensions);
} else if (key.key != null) {
for (Map.Entry<ICache<K, V>, TierInfo> cacheEntry : caches.entrySet()) {
try (ReleasableLock ignore = writeLock.acquire()) {
cacheEntry.getKey().invalidate(key);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ public class TieredSpilloverCacheStatsHolder extends DefaultCacheStatsHolder {
/** Dimension value for on-disk cache, like EhcacheDiskCache. */
public static final String TIER_DIMENSION_VALUE_DISK = "disk";

static final List<String> TIER_VALUES = List.of(TIER_DIMENSION_VALUE_ON_HEAP, TIER_DIMENSION_VALUE_DISK);

/**
* Constructor for the stats holder.
* @param originalDimensionNames the original dimension names, not including TIER_DIMENSION_NAME
Expand Down Expand Up @@ -167,4 +169,17 @@ public void decrementItems(List<String> dimensionValues) {
void setDiskCacheEnabled(boolean diskCacheEnabled) {
this.diskCacheEnabled = diskCacheEnabled;
}

@Override
public void removeDimensions(List<String> dimensionValues) {
assert dimensionValues.size() == dimensionNames.size() - 1
: "Must specify a value for every dimension except tier when removing from StatsHolder";
// As we are removing nodes from the tree, obtain the lock
lock.lock();
try {
removeDimensionsHelper(dimensionValues, statsRoot, 0);
} finally {
lock.unlock();
}
}
}
Loading

0 comments on commit aeb226d

Please sign in to comment.