From 9ca81bb7aafc0dedd1390f25dda976a9c345133f Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 18 Sep 2018 18:28:18 -0700 Subject: [PATCH] installer/pkg/config/libvirt: Caching for libvirt pulls Checking our RHCOS source against ETag [1] / If-None-Match [2] and Last-Modified [3] / If-Modified-Since [4], it seems to support In-None-Match well, but only supports If-Modified-Since for exact matches: $ URL=http://aos-ostree.rhev-ci-vms.eng.rdu2.redhat.com/rhcos/images/cloud/latest/rhcos-qemu.qcow2.gz $ curl -I "${URL}" HTTP/1.1 200 OK Server: nginx/1.8.0 Date: Wed, 19 Sep 2018 04:32:19 GMT Content-Type: application/octet-stream Content-Length: 684934062 Last-Modified: Tue, 18 Sep 2018 20:05:24 GMT Connection: keep-alive ETag: "5ba15a84-28d343ae" Accept-Ranges: bytes $ curl -sIH 'If-None-Match: "5ba15a84-28d343ae"' "${URL}" | head -n1 HTTP/1.1 304 Not Modified $ curl -sIH 'If-Modified-Since: Tue, 18 Sep 2018 20:05:24 GMT' "${URL}" | head -n1 HTTP/1.1 304 Not Modified $ curl -sIH 'If-Modified-Since: Tue, 18 Sep 2015 20:05:24 GMT' "${URL}" | head -n1 HTTP/1.1 200 OK $ curl -sIH 'If-Modified-Since: Tue, 18 Sep 2018 20:05:25 GMT' "${URL}" | grep 'HTTP\|Last-Modified' HTTP/1.1 200 OK Last-Modified: Tue, 18 Sep 2018 20:05:24 GMT That last entry should have 304ed, although the spec has [4]: When used for cache updates, a cache will typically use the value of the cached message's Last-Modified field to generate the field value of If-Modified-Since. This behavior is most interoperable for cases where clocks are poorly synchronized or when the server has chosen to only honor exact timestamp matches (due to a problem with Last-Modified dates that appear to go "back in time" when the origin server's clock is corrected or a representation is restored from an archived backup). However, caches occasionally generate the field value based on other data, such as the Date header field of the cached message or the local clock time that the message was received, particularly when the cached message does not contain a Last-Modified field. ... When used for cache updates, a cache will typically use the value of the cached message's Last-Modified field to generate the field value of If-Modified-Since. This behavior is most interoperable for cases where clocks are poorly synchronized or when the server has chosen to only honor exact timestamp matches (due to a problem with Last-Modified dates that appear to go "back in time" when the origin server's clock is corrected or a representation is restored from an archived backup). However, caches occasionally generate the field value based on other data, such as the Date header field of the cached message or the local clock time that the message was received, particularly when the cached message does not contain a Last-Modified field. So the server is violating the SHOULD by not 304ing dates greater than Last-Modified, but it's not violating a MUST-level requirement. The server requirements around If-None-Match are MUST-level [2], so using it should be more portable. The RFC also seems to prefer clients use If(-None)-Match [4,5]. I'm using gregjones/httpcache for the caching, since that implemenation seems reasonably popular and the repo's been around for a few years. That library uses the belt-and-suspenders approach of setting both If-None-Match (to the cached ETag) and If-Modified-Since (to the cached Last-Modified) [6], so we should be fine. UserCacheDir requires Go 1.11 [7,8,9]. The BUILD.bazel changes were generated with: $ bazel run //:gazelle using: $ bazel version Build label: 0.16.1- (@non-git) Build target: bazel-out/k8-opt/bin/src/main/java/com/google/devtools/build/lib/bazel/BazelServer_deploy.jar Build time: Mon Aug 13 16:42:29 2018 (1534178549) Build timestamp: 1534178549 Build timestamp as int: 1534178549 [1]: https://tools.ietf.org/html/rfc7232#section-2.3 [2]: https://tools.ietf.org/html/rfc7232#section-3.2 [3]: https://tools.ietf.org/html/rfc7232#section-2.2 [4]: https://tools.ietf.org/html/rfc7232#section-3.3 [5]: https://tools.ietf.org/html/rfc7232#section-2.4 [6]: https://github.com/gregjones/httpcache/blob/9cad4c3443a7200dd6400aef47183728de563a38/httpcache.go#L169-L181 [7]: https://github.com/golang/go/issues/22536 [8]: https://github.com/golang/go/commit/816154b06553a4cf8ee7ad089f5e444b37bed43d [9]: https://github.com/golang/go/commit/50bd1c4d4eb4fac8ddeb5f063c099daccfb71b26 --- installer/pkg/config/libvirt/BUILD.bazel | 11 +++- installer/pkg/config/libvirt/cache.go | 68 ++++++++++++++++++++++++ installer/pkg/config/parser.go | 1 + installer/pkg/workflow/BUILD.bazel | 1 - installer/pkg/workflow/init.go | 13 ++++- 5 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 installer/pkg/config/libvirt/cache.go diff --git a/installer/pkg/config/libvirt/BUILD.bazel b/installer/pkg/config/libvirt/BUILD.bazel index 92f6fa3cc90..c7ea48eeb5b 100644 --- a/installer/pkg/config/libvirt/BUILD.bazel +++ b/installer/pkg/config/libvirt/BUILD.bazel @@ -2,8 +2,15 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", - srcs = ["libvirt.go"], + srcs = [ + "cache.go", + "libvirt.go", + ], importpath = "github.com/openshift/installer/installer/pkg/config/libvirt", visibility = ["//visibility:public"], - deps = ["//vendor/github.com/apparentlymart/go-cidr/cidr:go_default_library"], + deps = [ + "//vendor/github.com/apparentlymart/go-cidr/cidr:go_default_library", + "//vendor/github.com/gregjones/httpcache:go_default_library", + "//vendor/github.com/gregjones/httpcache/diskcache:go_default_library", + ], ) diff --git a/installer/pkg/config/libvirt/cache.go b/installer/pkg/config/libvirt/cache.go new file mode 100644 index 00000000000..8a8ca6eb5b5 --- /dev/null +++ b/installer/pkg/config/libvirt/cache.go @@ -0,0 +1,68 @@ +package libvirt + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + + "github.com/gregjones/httpcache" + "github.com/gregjones/httpcache/diskcache" +) + +// UseCachedImage leaves non-file:// image URIs unalterered. +// Other URIs are retrieved with a local cache at +// $XDG_CACHE_HOME/openshift-install/libvirt [1]. This allows you to +// use the same remote image URI multiple times without needing to +// worry about redundant downloads, although you will want to +// periodically blow away your cache. +// +// [1]: https://standards.freedesktop.org/basedir-spec/basedir-spec-0.7.html +func (libvirt *Libvirt) UseCachedImage() (err error) { + // FIXME: set the default URI here? Leave it elsewhere? + + if strings.HasPrefix(libvirt.Image, "file://") { + return nil + } + + baseCacheDir, err := os.UserCacheDir() + if err != nil { + return err + } + + cacheDir := filepath.Join(baseCacheDir, "openshift-install", "libvirt") + err = os.MkdirAll(cacheDir, 0777) + if err != nil { + return err + } + + cache := diskcache.New(cacheDir) + transport := httpcache.NewTransport(cache) + resp, err := transport.Client().Get(libvirt.Image) + if err != nil { + return err + } + if resp.StatusCode != 200 { + return fmt.Errorf("%s while getting %s", resp.Status, libvirt.Image) + } + defer resp.Body.Close() + + // FIXME: diskcache's diskv backend doesn't expose direct file access. + // We can write our own cache implementation to get around this, + // but for now just dump this into /tmp without cleanup. + file, err := ioutil.TempFile("", "openshift-install-") + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(file, resp.Body) + if err != nil { + return err + } + + libvirt.Image = fmt.Sprintf("file://%s", filepath.ToSlash(file.Name())) + return nil +} diff --git a/installer/pkg/config/parser.go b/installer/pkg/config/parser.go index 0114babe315..2e316649cba 100644 --- a/installer/pkg/config/parser.go +++ b/installer/pkg/config/parser.go @@ -29,6 +29,7 @@ func ParseConfig(data []byte) (*Cluster, error) { return nil, err } cluster.PullSecret = string(data) + cluster.PullSecretPath = "" } if cluster.EC2AMIOverride == "" { diff --git a/installer/pkg/workflow/BUILD.bazel b/installer/pkg/workflow/BUILD.bazel index 6fe008d7c0e..4dcf2b2077d 100644 --- a/installer/pkg/workflow/BUILD.bazel +++ b/installer/pkg/workflow/BUILD.bazel @@ -17,7 +17,6 @@ go_library( deps = [ "//installer/pkg/config:go_default_library", "//installer/pkg/config-generator:go_default_library", - "//installer/pkg/copy:go_default_library", "//vendor/github.com/Sirupsen/logrus:go_default_library", "//vendor/gopkg.in/yaml.v2:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", diff --git a/installer/pkg/workflow/init.go b/installer/pkg/workflow/init.go index d79a2aa58ac..392ee8eee30 100644 --- a/installer/pkg/workflow/init.go +++ b/installer/pkg/workflow/init.go @@ -11,7 +11,6 @@ import ( "github.com/openshift/installer/installer/pkg/config" configgenerator "github.com/openshift/installer/installer/pkg/config-generator" - "github.com/openshift/installer/installer/pkg/copy" ) const ( @@ -94,6 +93,12 @@ func prepareWorspaceStep(m *metadata) error { return err } + if cluster.Platform == config.PlatformLibvirt { + if err := cluster.Libvirt.UseCachedImage(); err != nil { + return err + } + } + // generate clusterDir folder clusterDir := filepath.Join(dir, cluster.Name) m.clusterDir = clusterDir @@ -107,7 +112,11 @@ func prepareWorspaceStep(m *metadata) error { // put config file under the clusterDir folder configFilePath := filepath.Join(clusterDir, configFileName) - if err := copy.Copy(m.configFilePath, configFilePath); err != nil { + configContent, err := yaml.Marshal(cluster) + if err != nil { + return err + } + if err := ioutil.WriteFile(configFilePath, configContent, 0666); err != nil { return fmt.Errorf("failed to create cluster config at %q: %v", clusterDir, err) }