From d582fab4cc755a94e33c2c9467180dcab76e21b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radu=20Lucu=C8=9B?= Date: Mon, 19 Aug 2024 12:40:40 +0300 Subject: [PATCH] feat: improve caching support - adds support for 'Cache-Control' header - adds support for 'Retry-After' header - fetching is skipped for at least 1 minute after the previous successful check --- cmd/cleed/root_test.go | 237 +++++++++++++++++++++++++++++++++++-- cmd/cleed/unfollow_test.go | 21 ++-- internal/feed.go | 86 ++++++++++++-- internal/storage/cache.go | 34 ++++-- 4 files changed, 337 insertions(+), 41 deletions(-) diff --git a/cmd/cleed/root_test.go b/cmd/cleed/root_test.go index c05ff5a..b7dac03 100644 --- a/cmd/cleed/root_test.go +++ b/cmd/cleed/root_test.go @@ -109,14 +109,16 @@ RSS Feed • Item 1 assert.NoError(t, err) assert.Equal(t, 2, len(cacheInfo)) assert.Equal(t, &_storage.CacheInfoItem{ - URL: server.URL + "/rss", - LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), - ETag: "123", + URL: server.URL + "/rss", + LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), + ETag: "123", + FetchAfter: time.Unix(defaultCurrentTime.Unix()+60, 0), }, cacheInfo[server.URL+"/rss"]) assert.Equal(t, &_storage.CacheInfoItem{ - URL: server.URL + "/atom", - LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), - ETag: "", + URL: server.URL + "/atom", + LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), + ETag: "", + FetchAfter: time.Unix(defaultCurrentTime.Unix()+60, 0), }, cacheInfo[server.URL+"/atom"]) b, err := os.ReadFile(path.Join(cacheDir, "feed_"+url.QueryEscape(server.URL+"/rss"))) @@ -196,7 +198,7 @@ Atom Feed • Item 1 `, out.String()) } -func Test_Feed_Cached(t *testing.T) { +func Test_Feed_NotModified(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() @@ -266,6 +268,227 @@ RSS Feed • Item 1 `, out.String()) } +func Test_Feed_CacheControl(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + timeMock := mocks.NewMockTime(ctrl) + timeMock.EXPECT().Now().Return(defaultCurrentTime).AnyTimes() + + out := new(bytes.Buffer) + printer := internal.NewPrinter(nil, out, out) + storage := _storage.NewLocalStorage("cleed_test", timeMock) + defer localStorageCleanup(t, storage) + + configDir, err := os.UserConfigDir() + if err != nil { + t.Fatal(err) + } + listsDir := path.Join(configDir, "cleed_test", "lists") + err = os.MkdirAll(listsDir, 0700) + if err != nil { + t.Fatal(err) + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Cache-Control", "public, max-age=300") + w.Write([]byte(createDefaultRSS())) + })) + defer server.Close() + + err = os.WriteFile(path.Join(listsDir, "default"), + []byte(fmt.Sprintf("%d %s\n", + defaultCurrentTime.Unix(), server.URL, + ), + ), 0600) + if err != nil { + t.Fatal(err) + } + + feed := internal.NewTerminalFeed(timeMock, printer, storage) + feed.SetAgent("cleed/test") + + root, err := NewRoot("0.1.0", timeMock, printer, storage, feed) + assert.NoError(t, err) + + os.Args = []string{"cleed"} + + err = root.Cmd.Execute() + assert.NoError(t, err) + assert.Equal(t, `RSS Feed • Item 2 +1688 days ago https://rss-feed.com/item-2/ + +RSS Feed • Item 1 +15 minutes ago https://rss-feed.com/item-1/ + +`, out.String()) + + cacheInfo, err := storage.LoadCacheInfo() + assert.NoError(t, err) + assert.Equal(t, 1, len(cacheInfo)) + assert.Equal(t, &_storage.CacheInfoItem{ + URL: server.URL, + LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), + ETag: "", + FetchAfter: time.Unix(defaultCurrentTime.Unix()+300, 0), + }, cacheInfo[server.URL]) +} + +func Test_Feed_RetryAfter(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + timeMock := mocks.NewMockTime(ctrl) + timeMock.EXPECT().Now().Return(defaultCurrentTime).AnyTimes() + + out := new(bytes.Buffer) + printer := internal.NewPrinter(nil, out, out) + storage := _storage.NewLocalStorage("cleed_test", timeMock) + defer localStorageCleanup(t, storage) + + configDir, err := os.UserConfigDir() + if err != nil { + t.Fatal(err) + } + listsDir := path.Join(configDir, "cleed_test", "lists") + err = os.MkdirAll(listsDir, 0700) + if err != nil { + t.Fatal(err) + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "300") + w.WriteHeader(http.StatusTooManyRequests) + })) + defer server.Close() + + err = os.WriteFile(path.Join(listsDir, "default"), + []byte(fmt.Sprintf("%d %s\n", + defaultCurrentTime.Unix(), server.URL, + ), + ), 0600) + if err != nil { + t.Fatal(err) + } + + cacheDir, err := os.UserCacheDir() + if err != nil { + t.Fatal(err) + } + cacheDir = path.Join(cacheDir, "cleed_test") + err = os.MkdirAll(cacheDir, 0700) + if err != nil { + t.Fatal(err) + } + rss := createDefaultRSS() + err = storage.SaveFeedCache(bytes.NewBufferString(rss), server.URL) + if err != nil { + t.Fatal(err) + } + + feed := internal.NewTerminalFeed(timeMock, printer, storage) + feed.SetAgent("cleed/test") + + root, err := NewRoot("0.1.0", timeMock, printer, storage, feed) + assert.NoError(t, err) + + os.Args = []string{"cleed"} + + err = root.Cmd.Execute() + assert.NoError(t, err) + assert.Equal(t, `RSS Feed • Item 2 +1688 days ago https://rss-feed.com/item-2/ + +RSS Feed • Item 1 +15 minutes ago https://rss-feed.com/item-1/ + +`, out.String()) + + cacheInfo, err := storage.LoadCacheInfo() + assert.NoError(t, err) + assert.Equal(t, 1, len(cacheInfo)) + assert.Equal(t, &_storage.CacheInfoItem{ + URL: server.URL, + LastCheck: time.Unix(0, 0), + ETag: "", + FetchAfter: time.Unix(defaultCurrentTime.Unix()+300, 0), + }, cacheInfo[server.URL]) +} + +func Test_Feed_FetchAfter_Load_From_Cache(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + timeMock := mocks.NewMockTime(ctrl) + timeMock.EXPECT().Now().Return(defaultCurrentTime).AnyTimes() + + out := new(bytes.Buffer) + printer := internal.NewPrinter(nil, out, out) + storage := _storage.NewLocalStorage("cleed_test", timeMock) + defer localStorageCleanup(t, storage) + + configDir, err := os.UserConfigDir() + if err != nil { + t.Fatal(err) + } + listsDir := path.Join(configDir, "cleed_test", "lists") + err = os.MkdirAll(listsDir, 0700) + if err != nil { + t.Fatal(err) + } + + err = os.WriteFile(path.Join(listsDir, "default"), + []byte(fmt.Sprintf("%d %s\n", + defaultCurrentTime.Unix(), "https://example.com", + ), + ), 0600) + if err != nil { + t.Fatal(err) + } + + cacheDir, err := os.UserCacheDir() + if err != nil { + t.Fatal(err) + } + cacheDir = path.Join(cacheDir, "cleed_test") + err = os.MkdirAll(cacheDir, 0700) + if err != nil { + t.Fatal(err) + } + rss := createDefaultRSS() + err = storage.SaveFeedCache(bytes.NewBufferString(rss), "https://example.com") + if err != nil { + t.Fatal(err) + } + + storage.SaveCacheInfo(map[string]*_storage.CacheInfoItem{ + "https://example.com": { + URL: "https://example.com", + LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), + ETag: "etag", + FetchAfter: time.Unix(defaultCurrentTime.Unix()+300, 0), + }, + }) + + feed := internal.NewTerminalFeed(timeMock, printer, storage) + feed.SetAgent("cleed/test") + + root, err := NewRoot("0.1.0", timeMock, printer, storage, feed) + assert.NoError(t, err) + + os.Args = []string{"cleed"} + + err = root.Cmd.Execute() + assert.NoError(t, err) + assert.Equal(t, `RSS Feed Item 2 +1688 days ago https://rss-feed.com/item-2/ + +RSS Feed Item 1 +15 minutes ago https://rss-feed.com/item-1/ + +`, out.String()) +} + func Test_Feed_Limit(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() diff --git a/cmd/cleed/unfollow_test.go b/cmd/cleed/unfollow_test.go index 09ffcde..3ad24b8 100644 --- a/cmd/cleed/unfollow_test.go +++ b/cmd/cleed/unfollow_test.go @@ -216,14 +216,16 @@ func Test_Unfollow_Clean_Cache(t *testing.T) { err = storage.SaveCacheInfo(map[string]*_storage.CacheInfoItem{ "https://example.com": { - URL: "https://example.com", - LastCheck: defaultCurrentTime, - ETag: "etag", + URL: "https://example.com", + LastCheck: defaultCurrentTime, + ETag: "etag", + FetchAfter: time.Unix(0, 0), }, "https://test.com": { - URL: "https://test.com", - LastCheck: defaultCurrentTime, - ETag: "etag", + URL: "https://test.com", + LastCheck: defaultCurrentTime, + ETag: "etag", + FetchAfter: time.Unix(0, 0), }, }) if err != nil { @@ -266,9 +268,10 @@ https://test.com was removed from the list } assert.Len(t, cacheInfo, 1) assert.Equal(t, &_storage.CacheInfoItem{ - URL: "https://test.com", - LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), - ETag: "etag", + URL: "https://test.com", + LastCheck: time.Unix(defaultCurrentTime.Unix(), 0), + ETag: "etag", + FetchAfter: time.Unix(0, 0), }, cacheInfo["https://test.com"]) assert.NoFileExists(t, path.Join(cacheDir, "feed_"+url.QueryEscape("https://example.com"))) diff --git a/internal/feed.go b/internal/feed.go index b0f6ae5..1fb3d8d 100644 --- a/internal/feed.go +++ b/internal/feed.go @@ -299,15 +299,16 @@ func (f *TerminalFeed) processFeeds(opts *FeedOptions, config *storage.Config) ( ci := cacheInfo[url] if ci == nil { ci = &storage.CacheInfoItem{ - URL: url, - LastCheck: time.Time{}, + URL: url, + LastCheck: time.Unix(0, 0), + FetchAfter: time.Unix(0, 0), } cacheInfo[url] = ci } wg.Add(1) go func(ci *storage.CacheInfoItem) { defer wg.Done() - changed, etag, err := f.fetchFeed(ci) + res, err := f.fetchFeed(ci) if err != nil { f.printer.ErrPrintf("failed to fetch feed: %s: %v\n", ci.URL, err) return @@ -338,10 +339,13 @@ func (f *TerminalFeed) processFeeds(opts *FeedOptions, config *storage.Config) ( IsNew: feedItem.PublishedParsed.After(ci.LastCheck), }) } - if changed { - ci.ETag = etag + if res.Changed { + ci.ETag = res.ETag ci.LastCheck = f.time.Now() } + if res.FetchAfter.After(ci.FetchAfter) { + ci.FetchAfter = res.FetchAfter + } }(ci) } wg.Wait() @@ -361,12 +365,23 @@ func (f *TerminalFeed) parseFeed(url string) (*gofeed.Feed, error) { return f.parser.Parse(fc) } -func (f *TerminalFeed) fetchFeed(feed *storage.CacheInfoItem) (bool, string, error) { +type FetchResult struct { + Changed bool + ETag string + FetchAfter time.Time +} + +func (f *TerminalFeed) fetchFeed(feed *storage.CacheInfoItem) (*FetchResult, error) { + if feed.FetchAfter.After(f.time.Now()) { + return &FetchResult{ + Changed: false, + }, nil + } ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() req, err := http.NewRequestWithContext(ctx, "GET", feed.URL, nil) if err != nil { - return false, "", utils.NewInternalError(fmt.Sprintf("failed to create request: %v", err)) + return nil, utils.NewInternalError(fmt.Sprintf("failed to create request: %v", err)) } req.Header.Set("User-Agent", f.agent) if feed.ETag != "" { @@ -379,14 +394,24 @@ func (f *TerminalFeed) fetchFeed(feed *storage.CacheInfoItem) (bool, string, err req.Header.Set("Accept-Encoding", "br, gzip") res, err := f.http.Do(req) if err != nil { - return false, "", err + return nil, err } defer res.Body.Close() + if res.StatusCode == http.StatusNotModified { - return false, "", nil + return &FetchResult{ + Changed: false, + FetchAfter: f.time.Now().Add(parseMaxAge(res.Header.Get("Cache-Control"))), + }, nil + } + if res.StatusCode == http.StatusTooManyRequests || res.StatusCode == http.StatusServiceUnavailable { + return &FetchResult{ + Changed: false, + FetchAfter: f.parseRetryAfter(res.Header.Get("Retry-After")), + }, nil } if res.StatusCode != http.StatusOK { - return false, "", fmt.Errorf("unexpected status code: %d", res.StatusCode) + return nil, fmt.Errorf("unexpected status code: %d", res.StatusCode) } var bodyReader io.Reader = res.Body contentEncoding := res.Header.Get("Content-Encoding") @@ -395,11 +420,48 @@ func (f *TerminalFeed) fetchFeed(feed *storage.CacheInfoItem) (bool, string, err } else if contentEncoding == "gzip" { bodyReader, err = gzip.NewReader(res.Body) if err != nil { - return false, "", err + return nil, err } } err = f.storage.SaveFeedCache(bodyReader, feed.URL) - return true, res.Header.Get("ETag"), err + return &FetchResult{ + Changed: true, + ETag: res.Header.Get("ETag"), + FetchAfter: f.time.Now().Add(parseMaxAge(res.Header.Get("Cache-Control"))), + }, err +} + +func (f *TerminalFeed) parseRetryAfter(retryAfter string) time.Time { + if retryAfter == "" { + return f.time.Now().Add(5 * time.Minute) + } + retryAfterSeconds, err := strconv.Atoi(retryAfter) + if err == nil { + return f.time.Now().Add(time.Duration(retryAfterSeconds) * time.Second) + } + retryAfterTime, err := time.Parse(time.RFC1123, retryAfter) + if err == nil { + return retryAfterTime + } + return f.time.Now().Add(5 * time.Minute) +} + +func parseMaxAge(cacheControl string) time.Duration { + if cacheControl == "" { + return 60 * time.Second + } + parts := strings.Split(cacheControl, ",") + for i := range parts { + part := strings.TrimSpace(parts[i]) + if strings.HasPrefix(part, "max-age=") { + seconds, err := strconv.ParseInt(part[8:], 10, 64) + if err == nil { + return time.Duration(seconds) * time.Second + } + break + } + } + return 60 * time.Second } func mapColor(color uint8, config *storage.Config) uint8 { diff --git a/internal/storage/cache.go b/internal/storage/cache.go index e7595dc..4493e18 100644 --- a/internal/storage/cache.go +++ b/internal/storage/cache.go @@ -16,9 +16,10 @@ const ( ) type CacheInfoItem struct { - LastCheck time.Time - ETag string - URL string + LastCheck time.Time + FetchAfter time.Time + ETag string + URL string } func (s *LocalStorage) LoadCacheInfo() (map[string]*CacheInfoItem, error) { @@ -110,7 +111,12 @@ func (s *LocalStorage) RemoveFeedCaches(names []string) error { } func getCacheInfoItemLine(item *CacheInfoItem) []byte { - return []byte(fmt.Sprintf("%s %d %s\n", item.URL, item.LastCheck.Unix(), url.QueryEscape(item.ETag))) + return []byte(fmt.Sprintf("%s %d %s %d\n", + item.URL, + item.LastCheck.Unix(), + url.QueryEscape(item.ETag), + item.FetchAfter.Unix()), + ) } func parseCacheInfoItem(line string) (*CacheInfoItem, error) { @@ -122,16 +128,18 @@ func parseCacheInfoItem(line string) (*CacheInfoItem, error) { if err != nil { return nil, err } - etag := "" - if len(parts) > 2 { - etag, err = url.QueryUnescape(parts[2]) - if err != nil { - return nil, err - } + etag, err := url.QueryUnescape(parts[2]) + if err != nil { + return nil, err + } + var fetchAfter int64 + if len(parts) == 4 { + fetchAfter, _ = strconv.ParseInt(parts[3], 10, 64) } return &CacheInfoItem{ - LastCheck: time.Unix(lastCheck, 0), - ETag: etag, - URL: parts[0], + LastCheck: time.Unix(lastCheck, 0), + ETag: etag, + URL: parts[0], + FetchAfter: time.Unix(fetchAfter, 0), }, nil }