Skip to content

Commit

Permalink
Fix HNC startup issues
Browse files Browse the repository at this point in the history
This essentially reverts kubernetes-retired#1087, which breaks HNC on new clusters that
haven't previously had HNC installed. It fixes the nondeterministic
crashing problem by patching in
kubernetes-sigs/controller-runtime#1155, which
has been applied to controller-runtime 0.6.3 in @adrianludwin's repo.
This is a temporary hack and will be removed when controller-runtime
releases its own fix - likely 0.6.4.

Tested: with the reversion of kubernetes-retired#1087 (main.go), HNC can be installed on a
fresh cluster again but fails to start up ~50% of the time. With the fix
to controller-runtime, it passes on 20/20 startup attempts. Ran e2e
tests and got the same result as without this change (four failures).
  • Loading branch information
adrianludwin committed Sep 21, 2020
1 parent fe2e822 commit 7d6d1f6
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 44 deletions.
43 changes: 22 additions & 21 deletions incubator/hnc/cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,33 @@ func main() {

// Make sure certs are generated and valid if webhooks are enabled and internal certs are used.
setupLog.Info("Starting certificate generation")
setupFinished, err := validators.CreateCertsIfNeeded(mgr, novalidation, internalCert)
certsCreated, err := validators.CreateCertsIfNeeded(mgr, novalidation, internalCert)
if err != nil {
setupLog.Error(err, "unable to set up cert rotation")
os.Exit(1)
}

// Register webhooks before manager start to avoid potential race conditions.
// See https://github.com/kubernetes-sigs/controller-runtime/issues/1148.
go startControllers(mgr, certsCreated)

setupLog.Info("Starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
}

func startControllers(mgr ctrl.Manager, certsCreated chan struct{}) {
// The controllers won't work until the webhooks are operating, and those won't work until the
// certs are all in place.
setupLog.Info("Waiting for certificate generation to complete")
<-certsCreated

if testLog {
stats.StartLoggingActivity()
}

// Create the central in-memory data structure for HNC, since it needs to be shared among all
// other components.
f := forest.NewForest()

// Create all validating admission controllers.
Expand All @@ -179,24 +198,6 @@ func main() {
os.Exit(1)
}

go startControllers(mgr, f, setupFinished)

setupLog.Info("Starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
}

func startControllers(mgr ctrl.Manager, f *forest.Forest, setupFinished chan struct{}) {
setupLog.Info("Waiting for certificate generation to complete")
// Block until the setup finishes.
<-setupFinished

if testLog {
stats.StartLoggingActivity()
}

// Create all reconciling controllers
setupLog.Info("Creating controllers", "maxReconciles", maxReconciles)
removeOldCRDVersion := true
Expand Down
2 changes: 2 additions & 0 deletions incubator/hnc/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ require (
sigs.k8s.io/controller-runtime v0.6.3
sigs.k8s.io/controller-tools v0.2.8
)

replace sigs.k8s.io/controller-runtime => github.com/adrianludwin/controller-runtime v0.6.3-ts-fix
2 changes: 2 additions & 0 deletions incubator/hnc/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/adrianludwin/controller-runtime v0.6.3-ts-fix h1:UiTkM6oeOHhiG2Bm87PPWCEU54FNVq/IffLnNcx5cRk=
github.com/adrianludwin/controller-runtime v0.6.3-ts-fix/go.mod h1:WlZNXcM0++oyaQt4B7C2lEE5JYRs8vJUzRP4N4JpdAY=
github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4RqaHDIsdSBg7lM=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
Expand Down
3 changes: 2 additions & 1 deletion incubator/hnc/vendor/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ k8s.io/utils/buffer
k8s.io/utils/integer
k8s.io/utils/pointer
k8s.io/utils/trace
# sigs.k8s.io/controller-runtime v0.6.3
# sigs.k8s.io/controller-runtime v0.6.3 => github.com/adrianludwin/controller-runtime v0.6.3-ts-fix
## explicit
sigs.k8s.io/controller-runtime
sigs.k8s.io/controller-runtime/pkg/builder
Expand Down Expand Up @@ -758,3 +758,4 @@ sigs.k8s.io/kustomize/pkg/types
sigs.k8s.io/structured-merge-diff/v3/value
# sigs.k8s.io/yaml v1.2.0
sigs.k8s.io/yaml
# sigs.k8s.io/controller-runtime => github.com/adrianludwin/controller-runtime v0.6.3-ts-fix

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7d6d1f6

Please sign in to comment.