From 18977083a9b7870f2456b315c75ff19e44324cdf Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 8 Jan 2021 09:32:07 -0500 Subject: [PATCH] safely handle existing net namespace in default network manager When a client restarts, the network_hook's prerun will call `CreateNetwork`. Drivers that don't implement their own network manager will fall back to the default network manager, which doesn't handle the case where the network namespace is being recreated safely. This results in an error and the task being restarted for `exec` tasks with `network` blocks (this also impacts the community `containerd` and probably other community task drivers). If we get an error when attempting to create the namespace and that error is because the file already exists and is locked by its process, then we'll return a `nil` error with the `created` flag set to false, just as we do with the `docker` driver. --- CHANGELOG.md | 1 + client/allocrunner/network_manager_linux.go | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b0a810c68fe..b223d2a51729 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ IMPROVEMENTS: BUG FIXES: * client: Fixed a bug where clients configured with `cpu_total_compute` did not update the `cpu.totalcompute` node attribute. [[GH-9532](https://github.com/hashicorp/nomad/issues/9532)] + * client: Fixed a bug where non-`docker` tasks with network isolation were restarted on client restart. [[GH-9757](https://github.com/hashicorp/nomad/issues/9757)] * consul: Fixed a bug where updating a task to include services would not work [[GH-9707](https://github.com/hashicorp/nomad/issues/9707)] * consul: Fixed alloc address mode port advertisement to use the mapped `to` port value [[GH-9730](https://github.com/hashicorp/nomad/issues/9730)] * consul/connect: Fixed a bug where absent ingress envoy proxy configuration could panic client [[GH-9669](https://github.com/hashicorp/nomad/issues/9669)] diff --git a/client/allocrunner/network_manager_linux.go b/client/allocrunner/network_manager_linux.go index 63d63a34799f..b5bde8eff61e 100644 --- a/client/allocrunner/network_manager_linux.go +++ b/client/allocrunner/network_manager_linux.go @@ -2,7 +2,10 @@ package allocrunner import ( "fmt" + "os" + "path" "strings" + "syscall" hclog "github.com/hashicorp/go-hclog" clientconfig "github.com/hashicorp/nomad/client/config" @@ -92,6 +95,15 @@ type defaultNetworkManager struct{} func (*defaultNetworkManager) CreateNetwork(allocID string) (*drivers.NetworkIsolationSpec, bool, error) { netns, err := nsutil.NewNS(allocID) if err != nil { + // when a client restarts, the namespace will already exist and + // there will be a namespace file in use by the task process + if e, ok := err.(*os.PathError); ok && e.Err == syscall.EPERM { + nsPath := path.Join(nsutil.NetNSRunDir, allocID) + _, err := os.Stat(nsPath) + if err == nil { + return nil, false, nil + } + } return nil, false, err }