Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(repo-server): excess git requests, add shared cache lock on revisions (Issue #14725) #17109

Merged
merged 6 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ argocd-repo-server [flags]
--redisdb int Redis database.
--repo-cache-expiration duration Cache expiration for repo state, incl. app lists, app details, manifest generation, revision meta-data (default 24h0m0s)
--revision-cache-expiration duration Cache expiration for cached revision (default 3m0s)
--revision-cache-lock-timeout duration Cache TTL for locks to prevent duplicate requests on revisions, set to 0 to disable (default 10s)
--sentinel stringArray Redis sentinel hostname and port (e.g. argocd-redis-ha-announce-0:6379).
--sentinelmaster string Redis sentinel master group name. (default "master")
--streamed-manifest-max-extracted-size string Maximum size of streamed manifest archives when extracted (default "1G")
Expand Down
1 change: 1 addition & 0 deletions docs/operator-manual/server-commands/argocd-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ argocd-server [flags]
--repo-server-timeout-seconds int Repo server RPC call timeout seconds. (default 60)
--request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0")
--revision-cache-expiration duration Cache expiration for cached revision (default 3m0s)
--revision-cache-lock-timeout duration Cache TTL for locks to prevent duplicate requests on revisions, set to 0 to disable (default 10s)
--rootpath string Used if Argo CD is running behind reverse proxy under subpath different from /
--sentinel stringArray Redis sentinel hostname and port (e.g. argocd-redis-ha-announce-0:6379).
--sentinelmaster string Redis sentinel master group name. (default "master")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ spec:
name: argocd-cmd-params-cm
key: reposerver.disable.helm.manifest.max.extracted.size
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/core-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21484,6 +21484,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/ha/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23083,6 +23083,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/ha/namespace-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2204,6 +2204,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22129,6 +22129,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/namespace-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
169 changes: 144 additions & 25 deletions reposerver/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ import (
)

var ErrCacheMiss = cacheutil.ErrCacheMiss
var ErrCacheKeyLocked = cacheutil.ErrCacheKeyLocked

type Cache struct {
cache *cacheutil.Cache
repoCacheExpiration time.Duration
revisionCacheExpiration time.Duration
cache *cacheutil.Cache
repoCacheExpiration time.Duration
revisionCacheExpiration time.Duration
revisionCacheLockTimeout time.Duration
}

// ClusterRuntimeInfo holds cluster runtime information
Expand All @@ -39,16 +41,18 @@ type ClusterRuntimeInfo interface {
GetKubeVersion() string
}

func NewCache(cache *cacheutil.Cache, repoCacheExpiration time.Duration, revisionCacheExpiration time.Duration) *Cache {
return &Cache{cache, repoCacheExpiration, revisionCacheExpiration}
func NewCache(cache *cacheutil.Cache, repoCacheExpiration time.Duration, revisionCacheExpiration time.Duration, revisionCacheLockTimeout time.Duration) *Cache {
nromriell marked this conversation as resolved.
Show resolved Hide resolved
return &Cache{cache, repoCacheExpiration, revisionCacheExpiration, revisionCacheLockTimeout}
}

func AddCacheFlagsToCmd(cmd *cobra.Command, opts ...cacheutil.Options) func() (*Cache, error) {
var repoCacheExpiration time.Duration
var revisionCacheExpiration time.Duration
var revisionCacheLockTimeout time.Duration

cmd.Flags().DurationVar(&repoCacheExpiration, "repo-cache-expiration", env.ParseDurationFromEnv("ARGOCD_REPO_CACHE_EXPIRATION", 24*time.Hour, 0, math.MaxInt64), "Cache expiration for repo state, incl. app lists, app details, manifest generation, revision meta-data")
cmd.Flags().DurationVar(&revisionCacheExpiration, "revision-cache-expiration", env.ParseDurationFromEnv("ARGOCD_RECONCILIATION_TIMEOUT", 3*time.Minute, 0, math.MaxInt64), "Cache expiration for cached revision")
cmd.Flags().DurationVar(&revisionCacheLockTimeout, "revision-cache-lock-timeout", env.ParseDurationFromEnv("ARGOCD_REVISION_CACHE_LOCK_TIMEOUT", 10*time.Second, 0, math.MaxInt64), "Cache TTL for locks to prevent duplicate requests on revisions, set to 0 to disable")

repoFactory := cacheutil.AddCacheFlagsToCmd(cmd, opts...)

Expand All @@ -57,7 +61,7 @@ func AddCacheFlagsToCmd(cmd *cobra.Command, opts ...cacheutil.Options) func() (*
if err != nil {
return nil, fmt.Errorf("error adding cache flags to cmd: %w", err)
}
return NewCache(cache, repoCacheExpiration, revisionCacheExpiration), nil
return NewCache(cache, repoCacheExpiration, revisionCacheExpiration, revisionCacheLockTimeout), nil
}
}

Expand Down Expand Up @@ -145,7 +149,12 @@ func (c *Cache) ListApps(repoUrl, revision string) (map[string]string, error) {
}

func (c *Cache) SetApps(repoUrl, revision string, apps map[string]string) error {
return c.cache.SetItem(listApps(repoUrl, revision), apps, c.repoCacheExpiration, apps == nil)
return c.cache.SetItem(
listApps(repoUrl, revision),
apps,
&cacheutil.CacheActionOpts{
Expiration: c.repoCacheExpiration,
Delete: apps == nil})
}

func helmIndexRefsKey(repo string) string {
Expand All @@ -154,7 +163,14 @@ func helmIndexRefsKey(repo string) string {

// SetHelmIndex stores helm repository index.yaml content to cache
func (c *Cache) SetHelmIndex(repo string, indexData []byte) error {
return c.cache.SetItem(helmIndexRefsKey(repo), indexData, c.revisionCacheExpiration, false)
if indexData == nil {
// Logged as warning upstream
return fmt.Errorf("helm index data is nil, skipping cache")
}
return c.cache.SetItem(
helmIndexRefsKey(repo),
indexData,
&cacheutil.CacheActionOpts{Expiration: c.revisionCacheExpiration})
}

// GetHelmIndex retrieves helm repository index.yaml content from cache
Expand All @@ -172,21 +188,99 @@ func (c *Cache) SetGitReferences(repo string, references []*plumbing.Reference)
for i := range references {
input = append(input, references[i].Strings())
}
return c.cache.SetItem(gitRefsKey(repo), input, c.revisionCacheExpiration, false)
return c.cache.SetItem(gitRefsKey(repo), input, &cacheutil.CacheActionOpts{Expiration: c.revisionCacheExpiration})
}

// GetGitReferences retrieves resolved Git repository references from cache
func (c *Cache) GetGitReferences(repo string, references *[]*plumbing.Reference) error {
// Converts raw cache items to plumbing.Reference objects
func GitRefCacheItemToReferences(cacheItem [][2]string) *[]*plumbing.Reference {
var res []*plumbing.Reference
for i := range cacheItem {
// Skip empty data
if cacheItem[i][0] != "" || cacheItem[i][1] != "" {
res = append(res, plumbing.NewReferenceFromStrings(cacheItem[i][0], cacheItem[i][1]))
}
}
return &res
}

// TryLockGitRefCache attempts to lock the key for the Git repository references if the key doesn't exist, returns the value of
// GetGitReferences after calling the SET
func (c *Cache) TryLockGitRefCache(repo string, lockId string, references *[]*plumbing.Reference) (string, error) {
// This try set with DisableOverwrite is important for making sure that only one process is able to claim ownership
// A normal get + set, or just set would cause ownership to go to whoever the last writer was, and during race conditions
// leads to duplicate requests
err := c.cache.SetItem(gitRefsKey(repo), [][2]string{{cacheutil.CacheLockedValue, lockId}}, &cacheutil.CacheActionOpts{
Expiration: c.revisionCacheLockTimeout,
DisableOverwrite: true})
if err != nil {
// Log but ignore this error since we'll want to retry, failing to obtain the lock should not throw an error
log.Errorf("Error attempting to acquire git references cache lock: %v", err)
}
return c.GetGitReferences(repo, references)
}

// Retrieves the cache item for git repo references. Returns foundLockId, error
func (c *Cache) GetGitReferences(repo string, references *[]*plumbing.Reference) (string, error) {
var input [][2]string
if err := c.cache.GetItem(gitRefsKey(repo), &input); err != nil {
return err
err := c.cache.GetItem(gitRefsKey(repo), &input)
valueExists := len(input) > 0 && len(input[0]) > 0
switch {
// Unexpected Error
case err != nil && err != ErrCacheMiss:
log.Errorf("Error attempting to retrieve git references from cache: %v", err)
return "", err
// Value is set
case valueExists && input[0][0] != cacheutil.CacheLockedValue:
*references = *GitRefCacheItemToReferences(input)
return "", nil
// Key is locked
case valueExists:
return input[0][1], nil
// No key or empty key
default:
return "", nil
}
var res []*plumbing.Reference
for i := range input {
res = append(res, plumbing.NewReferenceFromStrings(input[i][0], input[i][1]))
}

// GetOrLockGitReferences retrieves the git references if they exist, otherwise creates a lock and returns so the caller can populate the cache
// Returns isLockOwner, localLockId, error
func (c *Cache) GetOrLockGitReferences(repo string, lockId string, references *[]*plumbing.Reference) (string, error) {
// Value matches the ttl on the lock in TryLockGitRefCache
waitUntil := time.Now().Add(c.revisionCacheLockTimeout)
// Wait only the maximum amount of time configured for the lock
// if the configured time is zero then the for loop will never run and instead act as the owner immediately
for time.Now().Before(waitUntil) {
// Get current cache state
if foundLockId, err := c.GetGitReferences(repo, references); foundLockId == lockId || err != nil || (references != nil && len(*references) > 0) {
return foundLockId, err
}
if foundLockId, err := c.TryLockGitRefCache(repo, lockId, references); foundLockId == lockId || err != nil || (references != nil && len(*references) > 0) {
return foundLockId, err
}
time.Sleep(1 * time.Second)
}
*references = res
return nil
// If configured time is 0 then this is expected
if c.revisionCacheLockTimeout > 0 {
log.Debug("Repository cache was unable to acquire lock or valid data within timeout")
}
// Timeout waiting for lock
return lockId, nil
}

// UnlockGitReferences unlocks the key for the Git repository references if needed
func (c *Cache) UnlockGitReferences(repo string, lockId string) error {
var input [][2]string
var err error
if err = c.cache.GetItem(gitRefsKey(repo), &input); err == nil &&
input != nil &&
len(input) > 0 &&
len(input[0]) > 1 &&
input[0][0] == cacheutil.CacheLockedValue &&
input[0][1] == lockId {
// We have the lock, so remove it
return c.cache.SetItem(gitRefsKey(repo), input, &cacheutil.CacheActionOpts{Delete: true})
}
return err
}

// refSourceCommitSHAs is a list of resolved revisions for each ref source. This allows us to invalidate the cache
Expand Down Expand Up @@ -274,11 +368,19 @@ func (c *Cache) SetManifests(revision string, appSrc *appv1.ApplicationSource, s
res.CacheEntryHash = hash
}

return c.cache.SetItem(manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs), res, c.repoCacheExpiration, res == nil)
return c.cache.SetItem(
manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs),
res,
&cacheutil.CacheActionOpts{
Expiration: c.repoCacheExpiration,
Delete: res == nil})
}

func (c *Cache) DeleteManifests(revision string, appSrc *appv1.ApplicationSource, srcRefs appv1.RefTargetRevisionMapping, clusterInfo ClusterRuntimeInfo, namespace, trackingMethod, appLabelKey, appName string, refSourceCommitSHAs ResolvedRevisions) error {
return c.cache.SetItem(manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs), "", c.repoCacheExpiration, true)
return c.cache.SetItem(
manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs),
"",
&cacheutil.CacheActionOpts{Delete: true})
}

func appDetailsCacheKey(revision string, appSrc *appv1.ApplicationSource, srcRefs appv1.RefTargetRevisionMapping, trackingMethod appv1.TrackingMethod, refSourceCommitSHAs ResolvedRevisions) string {
Expand All @@ -293,7 +395,12 @@ func (c *Cache) GetAppDetails(revision string, appSrc *appv1.ApplicationSource,
}

func (c *Cache) SetAppDetails(revision string, appSrc *appv1.ApplicationSource, srcRefs appv1.RefTargetRevisionMapping, res *apiclient.RepoAppDetailsResponse, trackingMethod appv1.TrackingMethod, refSourceCommitSHAs ResolvedRevisions) error {
return c.cache.SetItem(appDetailsCacheKey(revision, appSrc, srcRefs, trackingMethod, refSourceCommitSHAs), res, c.repoCacheExpiration, res == nil)
return c.cache.SetItem(
appDetailsCacheKey(revision, appSrc, srcRefs, trackingMethod, refSourceCommitSHAs),
res,
&cacheutil.CacheActionOpts{
Expiration: c.repoCacheExpiration,
Delete: res == nil})
}

func revisionMetadataKey(repoURL, revision string) string {
Expand All @@ -306,7 +413,10 @@ func (c *Cache) GetRevisionMetadata(repoURL, revision string) (*appv1.RevisionMe
}

func (c *Cache) SetRevisionMetadata(repoURL, revision string, item *appv1.RevisionMetadata) error {
return c.cache.SetItem(revisionMetadataKey(repoURL, revision), item, c.repoCacheExpiration, false)
return c.cache.SetItem(
revisionMetadataKey(repoURL, revision),
item,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func revisionChartDetailsKey(repoURL, chart, revision string) string {
Expand All @@ -319,15 +429,21 @@ func (c *Cache) GetRevisionChartDetails(repoURL, chart, revision string) (*appv1
}

func (c *Cache) SetRevisionChartDetails(repoURL, chart, revision string, item *appv1.ChartDetails) error {
return c.cache.SetItem(revisionChartDetailsKey(repoURL, chart, revision), item, c.repoCacheExpiration, false)
return c.cache.SetItem(
revisionChartDetailsKey(repoURL, chart, revision),
item,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func gitFilesKey(repoURL, revision, pattern string) string {
return fmt.Sprintf("gitfiles|%s|%s|%s", repoURL, revision, pattern)
}

func (c *Cache) SetGitFiles(repoURL, revision, pattern string, files map[string][]byte) error {
return c.cache.SetItem(gitFilesKey(repoURL, revision, pattern), &files, c.repoCacheExpiration, false)
return c.cache.SetItem(
gitFilesKey(repoURL, revision, pattern),
&files,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func (c *Cache) GetGitFiles(repoURL, revision, pattern string) (map[string][]byte, error) {
Expand All @@ -340,7 +456,10 @@ func gitDirectoriesKey(repoURL, revision string) string {
}

func (c *Cache) SetGitDirectories(repoURL, revision string, directories []string) error {
return c.cache.SetItem(gitDirectoriesKey(repoURL, revision), &directories, c.repoCacheExpiration, false)
return c.cache.SetItem(
gitDirectoriesKey(repoURL, revision),
&directories,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func (c *Cache) GetGitDirectories(repoURL, revision string) ([]string, error) {
Expand Down
Loading
Loading