From 748c0226852be7fdc31540b2c4dccb3ee3790f12 Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Tue, 15 Oct 2024 11:53:32 +0100 Subject: [PATCH] CA-400275 Tolerate a restarting fairlock service while connecting Cope with the case where the fairlock service is restarted while something is in the middle of connecting to it. Signed-off-by: Tim Smith --- .gitignore | 1 + misc/fairlock/fairlock.py | 20 ++++++++++++++------ tests/test_fairlock.py | 10 ++++++++-- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 881b5c341..f69b05c9a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.o *.swp *.pyc +*.kate-swp tests/faultinjection/pfilter tests/faultinjection/libipq.c tests/faultinjection/libipq/libipq.h diff --git a/misc/fairlock/fairlock.py b/misc/fairlock/fairlock.py index c1563ac6e..91a6a4992 100644 --- a/misc/fairlock/fairlock.py +++ b/misc/fairlock/fairlock.py @@ -48,6 +48,18 @@ def _ensure_service(self): if time.time() > timeout: raise FairlockServiceTimeout(f"Timed out starting service {service}") + def _connect_and_recv(self): + while True: + self.sock.connect(self.sockname) + # Merely being connected is not enough. Read a small blob of data. + b = self.sock.recv(10) + if len(b) > 0: + return True + # If we got a zero-length return, it means the service exited while we + # were waiting. Any timeout we put here would be a max wait time to acquire + # the lock, which is dangerous. + self._ensure_service() + def __enter__(self): if self.connected: raise FairlockDeadlock(f"Deadlock on Fairlock resource '{self.name}'") @@ -55,14 +67,10 @@ def __enter__(self): self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.sock.setblocking(True) try: - self.sock.connect(self.sockname) - # Merely being connected is not enough. Read a small blob of data. - self.sock.recv(10) + self._connect_and_recv() except (FileNotFoundError, ConnectionRefusedError): self._ensure_service() - self.sock.connect(self.sockname) - # Merely being connected is not enough. Read a small blob of data. - self.sock.recv(10) + self._connect_and_recv() self.sock.send(f'{os.getpid()} - {time.monotonic()}'.encode()) self.connected = True diff --git a/tests/test_fairlock.py b/tests/test_fairlock.py index d0e1f61b7..3078c0d73 100644 --- a/tests/test_fairlock.py +++ b/tests/test_fairlock.py @@ -23,6 +23,7 @@ def test_first_lock(self): mock_sock = mock.MagicMock() self.mock_socket.socket.return_value = mock_sock mock_sock.connect.side_effect = [FileNotFoundError(), 0] + mock_sock.recv.side_effect = [b'Foop'] self.mock_os.system.side_effect = [0, 1, 0] self.mock_time.time.side_effect = [0, 0, 0] @@ -38,6 +39,7 @@ def test_first_lock_timeout(self): mock_sock = mock.MagicMock() self.mock_socket.socket.return_value = mock_sock mock_sock.connect.side_effect = [FileNotFoundError(), 0] + mock_sock.recv.side_effect = [b'Foop'] self.mock_os.system.side_effect = [0, 1, 1, 1, 0] self.mock_time.time.side_effect = [0, 1, 3] @@ -53,6 +55,7 @@ def test_second_lock(self): mock_sock = mock.MagicMock() self.mock_socket.socket.return_value = mock_sock mock_sock.connect.side_effect = [0] + mock_sock.recv.side_effect = [b'Foop'] with Fairlock("test"): print("Hello World") @@ -68,6 +71,8 @@ def test_two_locks(self): self.mock_socket.socket.side_effect = [mock_sock1, mock_sock2] mock_sock1.connect.side_effect = [FileNotFoundError(), 0] mock_sock2.connect.side_effect = [FileNotFoundError(), 0] + mock_sock1.recv.side_effect = [b'Foop'] + mock_sock2.recv.side_effect = [b'Foop'] self.mock_os.system.side_effect = [0, 1, 0, 0, 1, 0] self.mock_time.time.side_effect = [0, 0, 0, 0, 0, 0] @@ -81,15 +86,16 @@ def test_double_lock_deadlock(self): Test double usage of the same lock """ mock_sock = mock.MagicMock() - self.mock_socket.socket.side_effect = [mock_sock] + self.mock_socket.socket.side_effect = [mock_sock] mock_sock.connect.side_effect = [FileNotFoundError(), 0] + mock_sock.recv.side_effect = [b'Foop', b'Foop'] self.mock_os.system.side_effect = [0, 1, 0, 0, 1, 0] self.mock_time.time.side_effect = [0, 0, 0, 0, 0, 0] with self.assertRaises(FairlockDeadlock) as err: with Fairlock("test") as l: n = Fairlock("test") - self.assertEquals(l, n) + self.assertEqual(l, n) # Real code would use another 'with Fairlock("test")' here but we cannot # do that because it insists on having a code block as a body, which would # then not be reached, causing a "Test code not fully covered" failure