diff --git a/functional/node_test.go b/functional/node_test.go index 1f11abbd6..663442396 100644 --- a/functional/node_test.go +++ b/functional/node_test.go @@ -82,3 +82,97 @@ func TestNodeShutdown(t *testing.T) { t.Fatalf("Unit hello.service not reported as inactive:\n%s\n", stdout) } } + +// TestDetectMachineId checks for etcd registration failing on a duplicated +// machine-id on different machines. +// First it creates a cluster with 2 members, m0 and m1. Then make their +// machine IDs the same as each other, by explicitly setting the m1's ID to +// the same as m0's. Test succeeds when an error returns, while test fails +// when nothing happens. +func TestDetectMachineId(t *testing.T) { + cluster, err := platform.NewNspawnCluster("smoke") + if err != nil { + t.Fatal(err) + } + defer cluster.Destroy() + + members, err := platform.CreateNClusterMembers(cluster, 2) + if err != nil { + t.Fatal(err) + } + + m0 := members[0] + m1 := members[1] + _, err = cluster.WaitForNMachines(m0, 2) + if err != nil { + t.Fatal(err) + } + + machineIdFile := "/etc/machine-id" + + // Restart fleet service, and check if its systemd status is still active. + restartFleetService := func(m platform.Member) error { + stdout, err := cluster.MemberCommand(m, "sudo", "systemctl", "restart", "fleet.service") + if err != nil { + return fmt.Errorf("Failed to restart fleet service\nstdout: %s\nerr: %v", stdout, err) + } + + stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=ActiveState", "fleet") + if strings.TrimSpace(stdout) != "ActiveState=active" { + return fmt.Errorf("Fleet unit not reported as active: %s", stdout) + } + stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=Result", "fleet") + if strings.TrimSpace(stdout) != "Result=success" { + return fmt.Errorf("Result for fleet unit not reported as success: %s", stdout) + } + return nil + } + + stdout, err := cluster.MemberCommand(m0, "cat", machineIdFile) + if err != nil { + t.Fatalf("Failed to get machine-id\nstdout: %s\nerr: %v", stdout, err) + } + m0_machine_id := strings.TrimSpace(stdout) + + // If the two machine IDs are different with each other, + // set the m1's ID to the same one as m0, to intentionally + // trigger an error case of duplication of machine ID. + stdout, err = cluster.MemberCommand(m1, + "echo", m0_machine_id, "|", "sudo", "tee", machineIdFile) + if err != nil { + t.Fatalf("Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err) + } + + if err := restartFleetService(m1); err != nil { + t.Fatal(err) + } + + // fleetd should actually be running, but failing to list machines. + // So we should expect a specific error after running fleetctl list-machines, + // like "googlapi: Error 503: fleet server unable to communicate with etcd". + stdout, stderr, err := cluster.Fleetctl(m1, "list-machines", "--no-legend") + if err != nil { + if !strings.Contains(err.Error(), "exit status 1") || + !strings.Contains(stderr, "fleet server unable to communicate with etcd") { + t.Fatalf("m1: Failed to get list of machines. err: %v\nstderr: %s", err, stderr) + } + // If both conditions are satisfied, "exit status 1" and + // "...unable to communicate...", then it's an expected error. PASS. + } else { + t.Fatalf("m1: should get an error, but got success.\nstderr: %s", stderr) + } + + // destroy m0 and let m1 grab its ID + cluster.DestroyMember(m0) + + // Wait again for m1 to register its self. + machines, err := cluster.WaitForNMachines(m1, 1) + if err != nil { + t.Fatal(err) + } + + // m1 should have get the first ID now + if machines[0] != m0_machine_id { + t.Fatalf("Error: m1 failed to register its self with the previous ID of m0: %s", m0_machine_id) + } +}