Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix that felix didn't restart when config was deleted. #1645

Merged
merged 1 commit into from
Dec 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion felix.go
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,8 @@ func (fc *DataplaneConnector) sendMessagesToDataplaneDriver() {
}
for kOld, vOld := range config {
logCxt := log.WithFields(log.Fields{"key": kOld, "old": vOld, "updateType": "delete"})
if _, prs := config[kOld]; prs {
if _, prs := msg.Config[kOld]; prs {
// Key was present in the message so we've handled above.
continue
}
if handledConfigChanges.Contains(kOld) {
Expand Down
62 changes: 43 additions & 19 deletions fv/config_update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,32 +35,35 @@ import (
var _ = Context("Config update tests, after starting felix", func() {

var (
etcd *containers.Container
felix *containers.Container
felixInitialPID int
client client.Interface
w [3]*workload.Workload
etcd *containers.Container
felix *containers.Container
felixPID int
client client.Interface
w [3]*workload.Workload
)

getFelixPIDs := func() []int {
return felix.GetPIDs("calico-felix")
}

BeforeEach(func() {
felix, etcd, client = containers.StartSingleNodeEtcdTopology()

updateFelixPID := func() {
// Get Felix's PID. This retry loop ensures that we don't get tripped up if we see multiple
// PIDs, which can happen transiently when Felix forks off a subprocess.
// PIDs, which can happen transiently when Felix restarts/forks off a subprocess.
start := time.Now()
for {
pids := getFelixPIDs()
Expect(pids).NotTo(BeEmpty())
if len(pids) == 1 {
felixInitialPID = pids[0]
felixPID = pids[0]
break
}
Expect(time.Since(start)).To(BeNumerically("<", time.Second))
time.Sleep(50 * time.Millisecond)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logic here doesn't feel quite right to me. Shouldn't it be 'wait until there's 1 Felix PID that is stable for 0.5s' ? As it stands, I think the code can be fooled by seeing a PID of a Felix on the verge of restarting.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's OK, I think: we want to get the old PID, then the tests use that to see whether felix restarted or not.

When we call this update method in the new test, it's after waiting for the old PID to disappear.

}
}

BeforeEach(func() {
felix, etcd, client = containers.StartSingleNodeEtcdTopology()
updateFelixPID()
})

AfterEach(func() {
Expand All @@ -85,10 +88,10 @@ var _ = Context("Config update tests, after starting felix", func() {
// Felix has a 2s timer before it restarts so need to monitor for > 2s.
// We use ContainElement because Felix regularly forks off subprocesses and those
// subprocesses initially have name "calico-felix".
Consistently(getFelixPIDs, "3s", "200ms").Should(ContainElement(felixInitialPID))
Consistently(getFelixPIDs, "3s", "200ms").Should(ContainElement(felixPID))
// We know the initial PID has continued to be active, check that none of the extra
// transientPIDs we see are long-lived.
Eventually(getFelixPIDs).Should(ConsistOf(felixInitialPID))
Eventually(getFelixPIDs).Should(ConsistOf(felixPID))
}

It("should stay up >2s", shouldStayUp)
Expand Down Expand Up @@ -132,23 +135,44 @@ var _ = Context("Config update tests, after starting felix", func() {
It("should stay up >2s", shouldStayUp)
})

shouldExitAfterADelay := func() {
Consistently(getFelixPIDs, "1s", "100ms").Should(ContainElement(felixPID))
Eventually(getFelixPIDs, "5s", "100ms").ShouldNot(ContainElement(felixPID))
}

Context("after updating config that should trigger a restart", func() {
var config *api.FelixConfiguration

BeforeEach(func() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

config := api.NewFelixConfiguration()
config = api.NewFelixConfiguration()
config.Name = "default"
config.Spec.InterfacePrefix = "foobarbaz"

_, err := client.FelixConfigurations().Create(ctx, config, options.SetOptions{})
var err error
config, err = client.FelixConfigurations().Create(ctx, config, options.SetOptions{})
Expect(err).NotTo(HaveOccurred())
})

It("should exit after a delay", func() {
// Felix has a 2s timer before it restarts so we should see the same PID for a while.
Consistently(getFelixPIDs, "1s", "100ms").Should(ContainElement(felixInitialPID))
Eventually(getFelixPIDs, "5s", "100ms").ShouldNot(ContainElement(felixInitialPID))
It("should exit after a delay", shouldExitAfterADelay)

Context("after deleting config that should trigger a restart", func() {
BeforeEach(func() {
// Wait for the add to register and cause a restart.
Eventually(getFelixPIDs, "5s", "100ms").ShouldNot(ContainElement(felixPID))
updateFelixPID()

// Then remove the config that we added.
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
config.Spec.InterfacePrefix = ""
_, err := client.FelixConfigurations().Update(ctx, config, options.SetOptions{})
Expect(err).NotTo(HaveOccurred())
})

It("should exit after a delay", shouldExitAfterADelay)
})
})
})
Expand Down