From 5e75ff7ddda5ea47d9b74c7baa84b0ddd965ced8 Mon Sep 17 00:00:00 2001 From: naveenaechan Date: Wed, 24 Jan 2024 01:11:40 +0530 Subject: [PATCH] Fail open handling for redis cache --- docs/configs/janusgraph-cfg.md | 1 + janusgraph-all/pom.xml | 2 +- janusgraph-backend-testutils/pom.xml | 2 +- janusgraph-berkeleyje/pom.xml | 2 +- janusgraph-bigtable/pom.xml | 2 +- janusgraph-core/pom.xml | 2 +- .../cache/ExpirationKCVSRedisCache.java | 75 ++++++++++++------- .../diskstorage/util/CacheMetricsAction.java | 2 +- .../GraphDatabaseConfiguration.java | 5 ++ janusgraph-cql/pom.xml | 2 +- janusgraph-dist/pom.xml | 2 +- janusgraph-doc/pom.xml | 2 +- janusgraph-driver/pom.xml | 2 +- janusgraph-es/pom.xml | 2 +- .../example-berkeleyje/pom.xml | 2 +- janusgraph-examples/example-common/pom.xml | 2 +- janusgraph-examples/example-cql/pom.xml | 2 +- janusgraph-examples/example-hbase/pom.xml | 2 +- .../example-remotegraph/pom.xml | 2 +- .../example-tinkergraph/pom.xml | 2 +- janusgraph-examples/pom.xml | 2 +- janusgraph-grpc/pom.xml | 2 +- janusgraph-hadoop/pom.xml | 2 +- janusgraph-hbase/pom.xml | 2 +- janusgraph-inmemory/pom.xml | 2 +- janusgraph-lucene/pom.xml | 2 +- janusgraph-server/pom.xml | 2 +- janusgraph-solr/pom.xml | 2 +- janusgraph-test/pom.xml | 2 +- pom.xml | 2 +- 30 files changed, 82 insertions(+), 53 deletions(-) diff --git a/docs/configs/janusgraph-cfg.md b/docs/configs/janusgraph-cfg.md index c7b2d34f43..dfda4f37bd 100644 --- a/docs/configs/janusgraph-cfg.md +++ b/docs/configs/janusgraph-cfg.md @@ -19,6 +19,7 @@ Configuration options that modify JanusGraph's caching behavior | Name | Description | Datatype | Default Value | Mutability | | ---- | ---- | ---- | ---- | ---- | | cache.cache-keyspace-prefix | Set prefix for keyspace created in redis. | String | janusgraph | MASKABLE | +| cache.cache-max-fail-open-count | Max queries to by-pass redis during redis fail over. If its set to 1000, after 1000 backend queries an attempt is made toquery redis again. This is to avoid waiting for every query during redis cluster failure. | Integer | 1000 | MASKABLE | | cache.cache-type | Enable or disable Redis cache (redis/inmemory) | String | inmemory | MASKABLE | | cache.db-cache | Whether to enable JanusGraph's database-level cache, which is shared across all transactions. Enabling this option speeds up traversals by holding hot graph elements in memory, but also increases the likelihood of reading stale data. Disabling it forces each transaction to independently fetch graph elements from storage before reading/writing them. | Boolean | false | MASKABLE | | cache.db-cache-clean-wait | How long, in milliseconds, database-level cache will keep entries after flushing them. This option is only useful on distributed storage backends that are capable of acknowledging writes without necessarily making them immediately visible. | Integer | 50 | GLOBAL_OFFLINE | diff --git a/janusgraph-all/pom.xml b/janusgraph-all/pom.xml index 6649c27ac1..43deeacaa6 100644 --- a/janusgraph-all/pom.xml +++ b/janusgraph-all/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-all diff --git a/janusgraph-backend-testutils/pom.xml b/janusgraph-backend-testutils/pom.xml index 6957f40289..989606fee7 100644 --- a/janusgraph-backend-testutils/pom.xml +++ b/janusgraph-backend-testutils/pom.xml @@ -4,7 +4,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-backend-testutils diff --git a/janusgraph-berkeleyje/pom.xml b/janusgraph-berkeleyje/pom.xml index 7f5b856a8a..80040681be 100644 --- a/janusgraph-berkeleyje/pom.xml +++ b/janusgraph-berkeleyje/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-berkeleyje diff --git a/janusgraph-bigtable/pom.xml b/janusgraph-bigtable/pom.xml index f90a7ebaf9..5a09824730 100644 --- a/janusgraph-bigtable/pom.xml +++ b/janusgraph-bigtable/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-bigtable diff --git a/janusgraph-core/pom.xml b/janusgraph-core/pom.xml index c6684f706d..0f5f63e645 100644 --- a/janusgraph-core/pom.xml +++ b/janusgraph-core/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-core diff --git a/janusgraph-core/src/main/java/org/janusgraph/diskstorage/keycolumnvalue/cache/ExpirationKCVSRedisCache.java b/janusgraph-core/src/main/java/org/janusgraph/diskstorage/keycolumnvalue/cache/ExpirationKCVSRedisCache.java index b292cd76ee..0e62c86a8e 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/diskstorage/keycolumnvalue/cache/ExpirationKCVSRedisCache.java +++ b/janusgraph-core/src/main/java/org/janusgraph/diskstorage/keycolumnvalue/cache/ExpirationKCVSRedisCache.java @@ -15,7 +15,6 @@ package org.janusgraph.diskstorage.keycolumnvalue.cache; import com.google.common.base.Preconditions; -import com.google.common.cache.CacheLoader; import org.janusgraph.core.JanusGraphException; import org.janusgraph.diskstorage.BackendException; import org.janusgraph.diskstorage.EntryList; @@ -31,6 +30,8 @@ import org.redisson.api.RLock; import org.redisson.api.RMapCache; import org.redisson.api.RedissonClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.HashMap; @@ -38,10 +39,10 @@ import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; +import java.util.concurrent.atomic.AtomicInteger; import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.CACHE_KEYSPACE_PREFIX; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.MAX_CACHE_FAIL_OPEN_COUNT; import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.REDIS_CACHE_LOCK_LEASE_MS; import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.REDIS_CACHE_LOCK_WAIT_MS; import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.REDIS_MAX_CACHE_SIZE; @@ -52,11 +53,13 @@ public class ExpirationKCVSRedisCache extends KCVSCache { public static final String REDIS_INDEX_CACHE_PREFIX = "index"; + private final AtomicInteger CACHE_FAIL_OPEN_COUNT = new AtomicInteger(0); + private static int CACHE_MAX_FAIL_OPEN_COUNT = 100; private final long cacheTimeMS; private RedissonClient redissonClient; private RMapCache redisCache; private RMapCache> redisIndexKeys; - private static Logger logger = Logger.getLogger("janusgraph-redis-logger"); + private static final Logger log = LoggerFactory.getLogger("janusgraph-redis-logger"); private static FSTConfiguration fastConf = FSTConfiguration.createDefaultConfiguration(); private Configuration configuration; @@ -68,39 +71,46 @@ public ExpirationKCVSRedisCache(final KeyColumnValueStore store, String metricsN Preconditions.checkArgument(System.currentTimeMillis() + 1000L * 3600 * 24 * 365 * 100 + cacheTimeMS > 0, "Cache expiration time too large, overflow may occur: %s", cacheTimeMS); this.cacheTimeMS = cacheTimeMS; Preconditions.checkArgument(invalidationGracePeriodMS >= 0, "Invalid expiration grace period: %s", invalidationGracePeriodMS); - + CACHE_MAX_FAIL_OPEN_COUNT = configuration.get(MAX_CACHE_FAIL_OPEN_COUNT); redissonClient = RedissonCache.getRedissonClient(configuration); redisCache = redissonClient.getMapCache(String.join("-", configuration.get(CACHE_KEYSPACE_PREFIX), metricsName)); redisIndexKeys = redissonClient.getMapCache(String.join("-", configuration.get(CACHE_KEYSPACE_PREFIX), REDIS_INDEX_CACHE_PREFIX, metricsName)); redisCache.setMaxSize(configuration.get(REDIS_MAX_CACHE_SIZE), EvictionMode.LFU); redisIndexKeys.setMaxSize(configuration.get(REDIS_MAX_CACHE_SIZE), EvictionMode.LFU); - logger.info("********************** Configurations are loaded **********************"); + log.info("********************** Cache configurations are loaded **********************"); } @Override public EntryList getSlice(final KeySliceQuery query, final StoreTransaction txh) throws BackendException { - incActionBy(1, CacheMetricsAction.RETRIEVAL, txh); - try { - return get(query, () -> { - incActionBy(1, CacheMetricsAction.MISS, txh); - return store.getSlice(query, unwrapTx(txh)); - }); - } catch (Exception e) { - if (e instanceof JanusGraphException) throw (JanusGraphException) e; - else if (e.getCause() instanceof JanusGraphException) throw (JanusGraphException) e.getCause(); - else throw new JanusGraphException(e); + if (CACHE_FAIL_OPEN_COUNT.get() <= 0) { + try { + return get(query, txh, () -> { + incActionBy(1, CacheMetricsAction.MISS, txh); + return store.getSlice(query, unwrapTx(txh)); + }); + } catch (Exception e) { + if (e instanceof JanusGraphException) { + throw new JanusGraphException(e); + } else { + CACHE_FAIL_OPEN_COUNT.set(CACHE_MAX_FAIL_OPEN_COUNT); + incActionBy(1, CacheMetricsAction.ERROR, txh); + log.warn("Exception occurred while fetching data from cache. query:{}", query.getKey(), e); + } + } } + /* fallback to backend store on cache failure until fail open */ + CACHE_FAIL_OPEN_COUNT.decrementAndGet(); + return store.getSlice(query, unwrapTx(txh)); } - private EntryList get(KeySliceQuery query, Callable valueLoader) throws Exception { + private EntryList get(KeySliceQuery query, final StoreTransaction txh, Callable valueLoader) throws Exception { byte[] bytQuery = redisCache.get(query); EntryList entries = bytQuery != null ? (EntryList) fastConf.asObject(bytQuery) : null; if (entries == null) { - logger.log(Level.INFO, "Reading from the store................."); try { entries = valueLoader.call(); if (entries == null) { - throw new CacheLoader.InvalidCacheLoadException("valueLoader must not return null, key=" + query); + throw new JanusGraphException("No value returned from backend data store to cache., key=" + query); } else { redisCache.fastPutAsync(query, fastConf.asByteArray(entries), this.cacheTimeMS, TimeUnit.MILLISECONDS); RLock lock = redisIndexKeys.getLock(query.getKey()); @@ -112,16 +122,20 @@ private EntryList get(KeySliceQuery query, Callable valueLoader) thro queryList.add(query); redisIndexKeys.fastPutAsync(query.getKey(), queryList, this.cacheTimeMS, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { - e.printStackTrace(); + log.warn("Exception during cache update operations", e); throw e; } finally { lock.unlock(); + redisIndexKeys.removeAsync(query.getKey()); } } } catch (Exception e) { - e.printStackTrace(); + log.warn("Exception during get and set cache operations", e); throw e; } + } else{ + CACHE_FAIL_OPEN_COUNT.set(0); + incActionBy(1, CacheMetricsAction.RETRIEVAL, txh); } return entries; } @@ -154,7 +168,7 @@ public Map getSlice(final List keys, fina if (subresult != null) { results.put(key, subresult); if (ksqs[i] != null) { - logger.info("adding to cache subresult " + subresult); + log.info("adding to cache subresult:{}", subresult); redisCache.fastPutAsync(ksqs[i], fastConf.asByteArray(subresult), this.cacheTimeMS, TimeUnit.MILLISECONDS); RLock lock = redisIndexKeys.getLock(ksqs[i].getKey()); try { @@ -184,13 +198,22 @@ public void clearCache() { @Override public void invalidate(StaticBuffer key, List entries) { + if (CACHE_FAIL_OPEN_COUNT.get() > 0) { + CACHE_FAIL_OPEN_COUNT.decrementAndGet(); + return; + } List keySliceQueryList = redisIndexKeys.get(key); - if (keySliceQueryList != null) { - for (KeySliceQuery keySliceQuery : keySliceQueryList) { - if (key.equals(keySliceQuery.getKey())) { - redisCache.fastRemoveAsync(keySliceQuery); + try { + if (keySliceQueryList != null) { + for (KeySliceQuery keySliceQuery : keySliceQueryList) { + if (key.equals(keySliceQuery.getKey())) { + redisCache.fastRemoveAsync(keySliceQuery); + } } } + } catch (Exception e) { + CACHE_FAIL_OPEN_COUNT.set(CACHE_MAX_FAIL_OPEN_COUNT); + log.warn("Error occurred while invalidating cache for key:{}", key, e); } } diff --git a/janusgraph-core/src/main/java/org/janusgraph/diskstorage/util/CacheMetricsAction.java b/janusgraph-core/src/main/java/org/janusgraph/diskstorage/util/CacheMetricsAction.java index 32e801c6ae..e4e88be213 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/diskstorage/util/CacheMetricsAction.java +++ b/janusgraph-core/src/main/java/org/janusgraph/diskstorage/util/CacheMetricsAction.java @@ -19,7 +19,7 @@ */ public enum CacheMetricsAction { - RETRIEVAL("retrievals"), MISS("misses"), EXPIRE("expire"); + RETRIEVAL("retrievals"), MISS("misses"), EXPIRE("expire"), ERROR("error"); private final String name; diff --git a/janusgraph-core/src/main/java/org/janusgraph/graphdb/configuration/GraphDatabaseConfiguration.java b/janusgraph-core/src/main/java/org/janusgraph/graphdb/configuration/GraphDatabaseConfiguration.java index be7039dd00..24733dc620 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/graphdb/configuration/GraphDatabaseConfiguration.java +++ b/janusgraph-core/src/main/java/org/janusgraph/graphdb/configuration/GraphDatabaseConfiguration.java @@ -370,6 +370,11 @@ public boolean apply(@Nullable String s) { "triggers eviction when set to 0).", ConfigOption.Type.MASKABLE, 120000L); + public static final ConfigOption MAX_CACHE_FAIL_OPEN_COUNT = new ConfigOption<>(CACHE_NS,"cache-max-fail-open-count", + "Max queries to by-pass redis during redis fail over. If its set to 1000, after 1000 backend queries an attempt is made to" + + "query redis again. This is to avoid waiting for every query during redis cluster failure.", + ConfigOption.Type.MASKABLE, 1000); + public static final ConfigOption REDIS_CACHE_SENTINEL_URLS = new ConfigOption<>(CACHE_NS,"redis-cache-sentinel-urls", "csv values for multiple redis sentinel host:port urls.", ConfigOption.Type.MASKABLE, "localhost:26379"); diff --git a/janusgraph-cql/pom.xml b/janusgraph-cql/pom.xml index 211fbc2c26..94c9220697 100644 --- a/janusgraph-cql/pom.xml +++ b/janusgraph-cql/pom.xml @@ -4,7 +4,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 janusgraph-cql diff --git a/janusgraph-dist/pom.xml b/janusgraph-dist/pom.xml index 67bad8b740..1728a822c3 100644 --- a/janusgraph-dist/pom.xml +++ b/janusgraph-dist/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml pom diff --git a/janusgraph-doc/pom.xml b/janusgraph-doc/pom.xml index e0d67a4fe3..85d02f2160 100644 --- a/janusgraph-doc/pom.xml +++ b/janusgraph-doc/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml pom diff --git a/janusgraph-driver/pom.xml b/janusgraph-driver/pom.xml index ce7d03f83f..6294e4be9b 100644 --- a/janusgraph-driver/pom.xml +++ b/janusgraph-driver/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-driver diff --git a/janusgraph-es/pom.xml b/janusgraph-es/pom.xml index 6e5772304b..3c87b120f1 100644 --- a/janusgraph-es/pom.xml +++ b/janusgraph-es/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-es diff --git a/janusgraph-examples/example-berkeleyje/pom.xml b/janusgraph-examples/example-berkeleyje/pom.xml index 43bb887ba8..ede4ce5394 100644 --- a/janusgraph-examples/example-berkeleyje/pom.xml +++ b/janusgraph-examples/example-berkeleyje/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph-examples - 0.6.03 + 0.6.04 ../pom.xml example-berkeleyje diff --git a/janusgraph-examples/example-common/pom.xml b/janusgraph-examples/example-common/pom.xml index 28d27bf1e0..49cb6876fd 100644 --- a/janusgraph-examples/example-common/pom.xml +++ b/janusgraph-examples/example-common/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph-examples - 0.6.03 + 0.6.04 ../pom.xml example-common diff --git a/janusgraph-examples/example-cql/pom.xml b/janusgraph-examples/example-cql/pom.xml index 477469e77c..516b2a38cf 100644 --- a/janusgraph-examples/example-cql/pom.xml +++ b/janusgraph-examples/example-cql/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph-examples - 0.6.03 + 0.6.04 ../pom.xml example-cql diff --git a/janusgraph-examples/example-hbase/pom.xml b/janusgraph-examples/example-hbase/pom.xml index e1018a59e4..cc4278d2d0 100644 --- a/janusgraph-examples/example-hbase/pom.xml +++ b/janusgraph-examples/example-hbase/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph-examples - 0.6.03 + 0.6.04 ../pom.xml example-hbase diff --git a/janusgraph-examples/example-remotegraph/pom.xml b/janusgraph-examples/example-remotegraph/pom.xml index 8e6916f938..ebb43acc5e 100644 --- a/janusgraph-examples/example-remotegraph/pom.xml +++ b/janusgraph-examples/example-remotegraph/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph-examples - 0.6.03 + 0.6.04 ../pom.xml example-remotegraph diff --git a/janusgraph-examples/example-tinkergraph/pom.xml b/janusgraph-examples/example-tinkergraph/pom.xml index f036029b33..09e7fb798b 100644 --- a/janusgraph-examples/example-tinkergraph/pom.xml +++ b/janusgraph-examples/example-tinkergraph/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph-examples - 0.6.03 + 0.6.04 ../pom.xml example-tinkergraph diff --git a/janusgraph-examples/pom.xml b/janusgraph-examples/pom.xml index 2eae8f5947..9fe75aa738 100644 --- a/janusgraph-examples/pom.xml +++ b/janusgraph-examples/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-examples diff --git a/janusgraph-grpc/pom.xml b/janusgraph-grpc/pom.xml index dfc5cebb00..2cb7b4b302 100644 --- a/janusgraph-grpc/pom.xml +++ b/janusgraph-grpc/pom.xml @@ -4,7 +4,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 janusgraph-grpc JanusGraph-gRPC: gRPC Components for JanusGraph diff --git a/janusgraph-hadoop/pom.xml b/janusgraph-hadoop/pom.xml index 3d525bd4b6..f20ab84676 100644 --- a/janusgraph-hadoop/pom.xml +++ b/janusgraph-hadoop/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-hadoop diff --git a/janusgraph-hbase/pom.xml b/janusgraph-hbase/pom.xml index 6a9a0a9c98..3e8dc3433f 100644 --- a/janusgraph-hbase/pom.xml +++ b/janusgraph-hbase/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 janusgraph-hbase JanusGraph-HBase: Distributed Graph Database diff --git a/janusgraph-inmemory/pom.xml b/janusgraph-inmemory/pom.xml index 560a7fda5e..bd197c4cbd 100644 --- a/janusgraph-inmemory/pom.xml +++ b/janusgraph-inmemory/pom.xml @@ -4,7 +4,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-inmemory diff --git a/janusgraph-lucene/pom.xml b/janusgraph-lucene/pom.xml index 40c9f385a8..c2c6cd65fd 100644 --- a/janusgraph-lucene/pom.xml +++ b/janusgraph-lucene/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-lucene diff --git a/janusgraph-server/pom.xml b/janusgraph-server/pom.xml index 06f122ed05..57861fe2e4 100644 --- a/janusgraph-server/pom.xml +++ b/janusgraph-server/pom.xml @@ -4,7 +4,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 janusgraph-server JanusGraph-Server: Server Components for JanusGraph diff --git a/janusgraph-solr/pom.xml b/janusgraph-solr/pom.xml index a4b5f89b8a..744354438e 100644 --- a/janusgraph-solr/pom.xml +++ b/janusgraph-solr/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-solr diff --git a/janusgraph-test/pom.xml b/janusgraph-test/pom.xml index 09e853b801..f208372dc2 100644 --- a/janusgraph-test/pom.xml +++ b/janusgraph-test/pom.xml @@ -3,7 +3,7 @@ org.janusgraph janusgraph - 0.6.03 + 0.6.04 ../pom.xml janusgraph-test diff --git a/pom.xml b/pom.xml index e3ef6d53a9..aebee58b69 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 org.janusgraph janusgraph - 0.6.03 + 0.6.04 pom 3.0.0