Skip to content

Commit

Permalink
data download: honor Last-Modified header (#2620)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmetc authored Nov 28, 2023
1 parent 77ce2aa commit b336774
Showing 1 changed file with 58 additions and 3 deletions.
61 changes: 58 additions & 3 deletions pkg/cwhub/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io"
"net/http"
"os"
"time"

log "github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
Expand Down Expand Up @@ -51,6 +52,62 @@ func downloadFile(url string, destPath string) error {
return nil
}

// needsUpdate checks if a data file has to be downloaded (or updated).
// if the local file doesn't exist, update.
// if the remote is newer than the local file, update.
// if the remote has no modification date, but local file has been modified > a week ago, update.
func needsUpdate(destPath string, url string) bool {
fileInfo, err := os.Stat(destPath)
switch {
case os.IsNotExist(err):
return true
case err != nil:
log.Errorf("while getting %s: %s", destPath, err)
return true

Check warning on line 66 in pkg/cwhub/dataset.go

View check run for this annotation

Codecov / codecov/patch

pkg/cwhub/dataset.go#L64-L66

Added lines #L64 - L66 were not covered by tests
}

resp, err := hubClient.Head(url)
if err != nil {
log.Errorf("while getting %s: %s", url, err)
// Head failed, Get would likely fail too -> no update
return false
}

Check warning on line 74 in pkg/cwhub/dataset.go

View check run for this annotation

Codecov / codecov/patch

pkg/cwhub/dataset.go#L71-L74

Added lines #L71 - L74 were not covered by tests
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
log.Errorf("bad http code %d for %s", resp.StatusCode, url)
return false
}

Check warning on line 80 in pkg/cwhub/dataset.go

View check run for this annotation

Codecov / codecov/patch

pkg/cwhub/dataset.go#L78-L80

Added lines #L78 - L80 were not covered by tests

// update if local file is older than this
shelfLife := 7 * 24 * time.Hour

lastModify := fileInfo.ModTime()

localIsOld := lastModify.Add(shelfLife).Before(time.Now())

remoteLastModified := resp.Header.Get("Last-Modified")
if remoteLastModified == "" {
if localIsOld {
log.Infof("no last modified date for %s, but local file is older than %s", url, shelfLife)
}
return localIsOld

Check warning on line 94 in pkg/cwhub/dataset.go

View check run for this annotation

Codecov / codecov/patch

pkg/cwhub/dataset.go#L91-L94

Added lines #L91 - L94 were not covered by tests
}

lastAvailable, err := time.Parse(time.RFC1123, remoteLastModified)
if err != nil {
log.Warningf("while parsing last modified date for %s: %s", url, err)
return localIsOld
}

Check warning on line 101 in pkg/cwhub/dataset.go

View check run for this annotation

Codecov / codecov/patch

pkg/cwhub/dataset.go#L99-L101

Added lines #L99 - L101 were not covered by tests

if lastModify.Before(lastAvailable) {
log.Infof("new version available, updating %s", destPath)
return true
}

Check warning on line 106 in pkg/cwhub/dataset.go

View check run for this annotation

Codecov / codecov/patch

pkg/cwhub/dataset.go#L104-L106

Added lines #L104 - L106 were not covered by tests

return false
}

// downloadDataSet downloads all the data files for an item.
func downloadDataSet(dataFolder string, force bool, reader io.Reader) error {
dec := yaml.NewDecoder(reader)
Expand All @@ -72,9 +129,7 @@ func downloadDataSet(dataFolder string, force bool, reader io.Reader) error {
return err
}

if _, err := os.Stat(destPath); os.IsNotExist(err) || force {
log.Infof("downloading data '%s' in '%s'", dataS.SourceURL, destPath)

if force || needsUpdate(destPath, dataS.SourceURL) {
if err := downloadFile(dataS.SourceURL, destPath); err != nil {
return fmt.Errorf("while getting data: %w", err)
}
Expand Down

0 comments on commit b336774

Please sign in to comment.