From 6c704bcd89b4f7c8feabb9830dd1f0b217ad29fc Mon Sep 17 00:00:00 2001 From: Niraj Yadav Date: Mon, 2 Sep 2024 20:22:03 +0530 Subject: [PATCH] cephfs: Fix Removal of IPs from blocklist While dealing with CephFS fencing we evict the clients and block the IPs from the CIDR range that do not have any active clients individually. While Unfencing, the IP is removed via the CIDR range which fails to remove the individual IPs from Ceph's blacklist. This PR fetches the blocklist from ceph and removes the IPs in blocklist that lie inside the CIDR range along with their unique nonces. Signed-off-by: Niraj Yadav --- internal/csi-addons/cephfs/network_fence.go | 2 +- internal/csi-addons/networkfence/fencing.go | 161 +++++++++++++++--- .../csi-addons/networkfence/fencing_test.go | 119 +++++++++++++ 3 files changed, 253 insertions(+), 29 deletions(-) diff --git a/internal/csi-addons/cephfs/network_fence.go b/internal/csi-addons/cephfs/network_fence.go index ecef1a4e0e7..f480939ede0 100644 --- a/internal/csi-addons/cephfs/network_fence.go +++ b/internal/csi-addons/cephfs/network_fence.go @@ -111,7 +111,7 @@ func (fcs *FenceControllerServer) UnfenceClusterNetwork( return nil, status.Error(codes.Internal, err.Error()) } - err = nwFence.RemoveNetworkFence(ctx) + err = nwFence.RemoveClientEviction(ctx) if err != nil { return nil, status.Errorf(codes.Internal, "failed to unfence CIDR block %q: %s", nwFence.Cidr, err.Error()) } diff --git a/internal/csi-addons/networkfence/fencing.go b/internal/csi-addons/networkfence/fencing.go index d49c621e3ed..1f1eff72ac8 100644 --- a/internal/csi-addons/networkfence/fencing.go +++ b/internal/csi-addons/networkfence/fencing.go @@ -52,6 +52,13 @@ type activeClient struct { Inst string `json:"inst"` } +// IPWithNonce represents the structure of an IP with nonce +// as listed by Ceph OSD blocklist. +type IPWithNonce struct { + IP string `json:"ip"` + Nonce string `json:"nonce"` +} + // NewNetworkFence returns a networkFence struct object from the Network fence/unfence request. func NewNetworkFence( ctx context.Context, @@ -256,9 +263,7 @@ func (ac *activeClient) fetchID() (int, error) { } // AddClientEviction blocks access for all the IPs in the CIDR block -// using client eviction. -// blocks the active clients listed in cidr, and the IPs -// for whom there is no active client present too. +// using client eviction, it also blocks the entire CIDR. func (nf *NetworkFence) AddClientEviction(ctx context.Context) error { evictedIPs := make(map[string]bool) // fetch active clients @@ -269,13 +274,15 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error { // iterate through CIDR blocks and check if any active client matches for _, cidr := range nf.Cidr { for _, client := range activeClients { - clientIP, err := client.fetchIP() + var clientIP string + clientIP, err = client.fetchIP() if err != nil { return fmt.Errorf("error fetching client IP: %w", err) } // check if the clientIP is in the CIDR block if isIPInCIDR(ctx, clientIP, cidr) { - clientID, err := client.fetchID() + var clientID int + clientID, err = client.fetchID() if err != nil { return fmt.Errorf("error fetching client ID: %w", err) } @@ -291,25 +298,10 @@ func (nf *NetworkFence) AddClientEviction(ctx context.Context) error { } } - // blocklist the IPs in CIDR without any active clients - for _, cidr := range nf.Cidr { - // check if the CIDR is evicted - // fetch the list of IPs from a CIDR block - hosts, err := getIPRange(cidr) - if err != nil { - return fmt.Errorf("failed to convert CIDR block %s to corresponding IP range: %w", cidr, err) - } - - // add ceph blocklist for each IP in the range mentioned by the CIDR - for _, host := range hosts { - if evictedIPs[host] { - continue - } - err = nf.addCephBlocklist(ctx, host, false) - if err != nil { - return err - } - } + // add the range based blocklist for CIDR + err = nf.AddNetworkFence(ctx) + if err != nil { + return err } return nil @@ -358,7 +350,8 @@ func GetCIDR(cidrs Cidrs) ([]string, error) { } // removeCephBlocklist removes an IP from ceph osd blocklist. -func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useRange bool) error { +// the value of nonce is ignored if useRange is true. +func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip, nonce string, useRange bool) error { arg := []string{ "--id", nf.cr.ID, "--keyfile=" + nf.cr.KeyFile, @@ -368,7 +361,15 @@ func (nf *NetworkFence) removeCephBlocklist(ctx context.Context, ip string, useR if useRange { cmd = append(cmd, "range") } - cmd = append(cmd, "rm", ip) + + // If nonce is not empty and we are not using + // range based blocks, we need to add the nonce + if nonce != "" && !useRange { + cmd = append(cmd, "rm", fmt.Sprintf("%s:0/%s", ip, nonce)) + } else { + cmd = append(cmd, "rm", ip) + } + cmd = append(cmd, arg...) _, stdErr, err := util.ExecCommand(ctx, "ceph", cmd...) @@ -396,7 +397,7 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error { // try range blocklist cmd, if invalid fallback to // iterating through IP range. if hasBlocklistRangeSupport { - err := nf.removeCephBlocklist(ctx, cidr, true) + err := nf.removeCephBlocklist(ctx, cidr, "", true) if err == nil { continue } @@ -412,7 +413,41 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error { } // remove ceph blocklist for each IP in the range mentioned by the CIDR for _, host := range hosts { - err := nf.removeCephBlocklist(ctx, host, false) + // 0 is used as nonce here to tell ceph + // to remove the blocklist entry matching: :0/0 + // it is same as telling ceph to remove just the IP + // without specifying any port or nonce with it. + err := nf.removeCephBlocklist(ctx, host, "0", false) + if err != nil { + return err + } + } + } + + return nil +} + +func (nf *NetworkFence) RemoveClientEviction(ctx context.Context) error { + // Remove the CIDR block first + err := nf.RemoveNetworkFence(ctx) + if err != nil { + return err + } + + // Get the ceph blocklist + blocklist, err := nf.getCephBlocklist(ctx) + if err != nil { + return err + } + + // For each CIDR block, remove the IPs in the blocklist + // that fall under the CIDR with nonce + for _, cidr := range nf.Cidr { + hosts := nf.parseBlocklistForCIDR(ctx, blocklist, cidr) + log.DebugLog(ctx, "parsed blocklist for CIDR %s: %+v", cidr, hosts) + + for _, host := range hosts { + err := nf.removeCephBlocklist(ctx, host.IP, host.Nonce, false) if err != nil { return err } @@ -421,3 +456,73 @@ func (nf *NetworkFence) RemoveNetworkFence(ctx context.Context) error { return nil } + +// getCephBlocklist fetches the ceph blocklist and returns it as a string. +func (nf *NetworkFence) getCephBlocklist(ctx context.Context) (string, error) { + arg := []string{ + "--id", nf.cr.ID, + "--keyfile=" + nf.cr.KeyFile, + "-m", nf.Monitors, + } + // FIXME: replace the ceph command with go-ceph API in future + cmd := []string{"osd", "blocklist", "ls"} + cmd = append(cmd, arg...) + stdout, stdErr, err := util.ExecCommandWithTimeout(ctx, 2*time.Minute, "ceph", cmd...) + if err != nil { + return "", fmt.Errorf("failed to get the ceph blocklist: %w, stderr: %q", err, stdErr) + } + + return stdout, nil +} + +// parseBlocklistEntry parses a single entry from the ceph blocklist +// and returns the IPWithNonce. +func (nf *NetworkFence) parseBlocklistEntry(entry string) IPWithNonce { + parts := strings.Fields(entry) + if len(parts) == 0 { + return IPWithNonce{} + } + + ipPortNonce := strings.SplitN(parts[0], "/", 2) + if len(ipPortNonce) != 2 { + return IPWithNonce{} + } + + ipPort := ipPortNonce[0] + nonce := ipPortNonce[1] + + lastColonIndex := strings.LastIndex(ipPortNonce[0], ":") + if lastColonIndex == -1 { + return IPWithNonce{} + } + + if len(ipPort) <= lastColonIndex { + return IPWithNonce{} + } + ip := ipPort[:lastColonIndex] + + return IPWithNonce{IP: ip, Nonce: nonce} +} + +// parseBlocklistForCIDR scans the blocklist for the given CIDR and returns +// the list of IPs that lie within the CIDR along with their nonce. +func (nf *NetworkFence) parseBlocklistForCIDR(ctx context.Context, blocklist, cidr string) []IPWithNonce { + blocklistEntries := strings.Split(blocklist, "\n") + + matchingHosts := make([]IPWithNonce, 0) + for _, entry := range blocklistEntries { + entry = strings.TrimSpace(entry) + + // Skip unrelated ranged blocks and invalid entries + if strings.Contains(entry, "cidr") || !strings.Contains(entry, "/") { + continue + } + + blockedHost := nf.parseBlocklistEntry(entry) + if isIPInCIDR(ctx, blockedHost.IP, cidr) { + matchingHosts = append(matchingHosts, blockedHost) + } + } + + return matchingHosts +} diff --git a/internal/csi-addons/networkfence/fencing_test.go b/internal/csi-addons/networkfence/fencing_test.go index f7bfa797a05..4bd7636acf3 100644 --- a/internal/csi-addons/networkfence/fencing_test.go +++ b/internal/csi-addons/networkfence/fencing_test.go @@ -17,6 +17,7 @@ limitations under the License. package networkfence import ( + "context" "testing" "github.com/stretchr/testify/require" @@ -138,3 +139,121 @@ func TestFetchID(t *testing.T) { }) } } + +func TestParseBlocklistEntry(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + expected IPWithNonce + }{ + { + name: "Valid IP and nonce", + input: "192.168.1.1:6789/abcdef123456", + expected: IPWithNonce{IP: "192.168.1.1", Nonce: "abcdef123456"}, + }, + { + name: "IPv6 address with full notation", + input: "2001:0db8:0000:0000:0000:8a2e:0370:7334:6789/abc123", + expected: IPWithNonce{IP: "2001:0db8:0000:0000:0000:8a2e:0370:7334", Nonce: "abc123"}, + }, + { + name: "IPv6 address with compressed zeros", + input: "2001:db8::1428:57ab:6789/def456", + expected: IPWithNonce{IP: "2001:db8::1428:57ab", Nonce: "def456"}, + }, + { + name: "IPv6 loopback address", + input: "::1:6789/ghi789", + expected: IPWithNonce{IP: "::1", Nonce: "ghi789"}, + }, + { + name: "IPv6 address with IPv4 mapping", + input: "::ffff:192.0.2.128:6789/jkl012", + expected: IPWithNonce{IP: "::ffff:192.0.2.128", Nonce: "jkl012"}, + }, + { + name: "IP without port", + input: "10.0.0.1/nonce123", + expected: IPWithNonce{}, + }, + { + name: "Extra whitespace", + input: " 172.16.0.1:1234/abc123 extra info ", + expected: IPWithNonce{IP: "172.16.0.1", Nonce: "abc123"}, + }, + } + + nf := &NetworkFence{} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + result := nf.parseBlocklistEntry(tt.input) + require.Equal(t, tt.expected, result) + }) + } +} + +func TestParseBlocklistForCIDR(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + blocklist string + cidr string + expected []IPWithNonce + }{ + { + name: "Single IPv4 in CIDR", + blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000 +listed 1 entries`, + cidr: "192.168.1.0/24", + expected: []IPWithNonce{{IP: "192.168.1.1", Nonce: "1234567"}}, + }, + { + name: "Multiple IPv4 in CIDR", + blocklist: `192.168.1.1:0/1234567 expires 2023-07-01 10:00:00.000000 +192.168.1.2:0/7654321 expires 2023-07-01 11:00:00.000000 +192.168.2.1:0/abcdefg expires 2023-07-01 12:00:00.000000 +listed 3 entries`, + cidr: "192.168.1.0/24", + expected: []IPWithNonce{ + {IP: "192.168.1.1", Nonce: "1234567"}, + {IP: "192.168.1.2", Nonce: "7654321"}, + }, + }, + { + name: "IPv6 in CIDR", + blocklist: `2001:db8::1:0/fedcba expires 2023-07-01 10:00:00.000000 +2001:db8::2:0/abcdef expires 2023-07-01 11:00:00.000000 +listed 2 entries`, + cidr: "2001:db8::/64", + expected: []IPWithNonce{{IP: "2001:db8::1", Nonce: "fedcba"}, {IP: "2001:db8::2", Nonce: "abcdef"}}, + }, + { + name: "Empty blocklist", + blocklist: `listed 0 entries`, + cidr: "192.168.1.0/24", + expected: []IPWithNonce{}, + }, + { + name: "No matching IPs", + blocklist: `10.0.0.1:0/1234567 expires 2023-07-01 10:00:00.000000 +listed 1 entries`, + cidr: "192.168.1.0/24", + expected: []IPWithNonce{}, + }, + } + + nf := &NetworkFence{} + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + result := nf.parseBlocklistForCIDR(context.TODO(), tc.blocklist, tc.cidr) + require.Equal(t, tc.expected, result) + }) + } +}