From 9fd76152c3b4b654cc13900669e2ccde55e3d3e9 Mon Sep 17 00:00:00 2001
From: Dongsu Park <dongsu@endocode.com>
Date: Thu, 21 Apr 2016 13:42:16 +0200
Subject: [PATCH] functional: introduce a new test TestDetectMachineId

A new test TestDetectMachineId checks if a etcd registration fails
when a duplicated entry for /etc/machine-id gets registered to
different machines. Note that it's expected to fail in this case.

Goal of the test is to cover the improvement patch by @wuqixuan
("fleetd: Detecting the existing machine-id").

See also https://github.com/coreos/fleet/pull/1288,
https://github.com/coreos/fleet/issues/1241,
https://github.com/coreos/fleet/issues/615.

Suggested-by: Olaf Buddenhagen <olaf@endocode.com>
Cc: wuqixuan <wuqixuan@huawei.com>
Cc: Djalal Harouni <djalal@endocode.com>
---
 functional/node_test.go | 94 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/functional/node_test.go b/functional/node_test.go
index 1f11abbd6..663442396 100644
--- a/functional/node_test.go
+++ b/functional/node_test.go
@@ -82,3 +82,97 @@ func TestNodeShutdown(t *testing.T) {
 		t.Fatalf("Unit hello.service not reported as inactive:\n%s\n", stdout)
 	}
 }
+
+// TestDetectMachineId checks for etcd registration failing on a duplicated
+// machine-id on different machines.
+// First it creates a cluster with 2 members, m0 and m1. Then make their
+// machine IDs the same as each other, by explicitly setting the m1's ID to
+// the same as m0's. Test succeeds when an error returns, while test fails
+// when nothing happens.
+func TestDetectMachineId(t *testing.T) {
+	cluster, err := platform.NewNspawnCluster("smoke")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cluster.Destroy()
+
+	members, err := platform.CreateNClusterMembers(cluster, 2)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	m0 := members[0]
+	m1 := members[1]
+	_, err = cluster.WaitForNMachines(m0, 2)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	machineIdFile := "/etc/machine-id"
+
+	// Restart fleet service, and check if its systemd status is still active.
+	restartFleetService := func(m platform.Member) error {
+		stdout, err := cluster.MemberCommand(m, "sudo", "systemctl", "restart", "fleet.service")
+		if err != nil {
+			return fmt.Errorf("Failed to restart fleet service\nstdout: %s\nerr: %v", stdout, err)
+		}
+
+		stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=ActiveState", "fleet")
+		if strings.TrimSpace(stdout) != "ActiveState=active" {
+			return fmt.Errorf("Fleet unit not reported as active: %s", stdout)
+		}
+		stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=Result", "fleet")
+		if strings.TrimSpace(stdout) != "Result=success" {
+			return fmt.Errorf("Result for fleet unit not reported as success: %s", stdout)
+		}
+		return nil
+	}
+
+	stdout, err := cluster.MemberCommand(m0, "cat", machineIdFile)
+	if err != nil {
+		t.Fatalf("Failed to get machine-id\nstdout: %s\nerr: %v", stdout, err)
+	}
+	m0_machine_id := strings.TrimSpace(stdout)
+
+	// If the two machine IDs are different with each other,
+	// set the m1's ID to the same one as m0, to intentionally
+	// trigger an error case of duplication of machine ID.
+	stdout, err = cluster.MemberCommand(m1,
+		"echo", m0_machine_id, "|", "sudo", "tee", machineIdFile)
+	if err != nil {
+		t.Fatalf("Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err)
+	}
+
+	if err := restartFleetService(m1); err != nil {
+		t.Fatal(err)
+	}
+
+	// fleetd should actually be running, but failing to list machines.
+	// So we should expect a specific error after running fleetctl list-machines,
+	// like "googlapi: Error 503: fleet server unable to communicate with etcd".
+	stdout, stderr, err := cluster.Fleetctl(m1, "list-machines", "--no-legend")
+	if err != nil {
+		if !strings.Contains(err.Error(), "exit status 1") ||
+			!strings.Contains(stderr, "fleet server unable to communicate with etcd") {
+			t.Fatalf("m1: Failed to get list of machines. err: %v\nstderr: %s", err, stderr)
+		}
+		// If both conditions are satisfied, "exit status 1" and
+		// "...unable to communicate...", then it's an expected error. PASS.
+	} else {
+		t.Fatalf("m1: should get an error, but got success.\nstderr: %s", stderr)
+	}
+
+	// destroy m0 and let m1 grab its ID
+	cluster.DestroyMember(m0)
+
+	// Wait again for m1 to register its self.
+	machines, err := cluster.WaitForNMachines(m1, 1)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// m1 should have get the first ID now
+	if machines[0] != m0_machine_id {
+		t.Fatalf("Error: m1 failed to register its self with the previous ID of m0: %s", m0_machine_id)
+	}
+}