From e876ca96b2019d291b720ffe4ca86a67d38976e1 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 12 Oct 2023 16:42:49 +0800 Subject: [PATCH 1/3] keep socket alive in test --- tests/cpp/collective/test_comm.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/cpp/collective/test_comm.cc b/tests/cpp/collective/test_comm.cc index 7792c4c25059..57be7bb0a7b4 100644 --- a/tests/cpp/collective/test_comm.cc +++ b/tests/cpp/collective/test_comm.cc @@ -18,9 +18,12 @@ TEST_F(CommTest, Channel) { std::vector workers; std::int32_t port = tracker.Port(); + std::vector> refs(n_workers); + for (std::int32_t i = 0; i < n_workers; ++i) { - workers.emplace_back([=] { - WorkerForTest worker{host, port, timeout, n_workers, i}; + workers.emplace_back([=, &refs] { + refs[i] = std::make_shared(host, port, timeout, n_workers, i); + auto &worker = *refs[i]; if (i % 2 == 0) { auto p_chan = worker.Comm().Chan(i + 1); p_chan->SendAll( @@ -41,6 +44,7 @@ TEST_F(CommTest, Channel) { for (auto &w : workers) { w.join(); } + refs = decltype(refs){}; ASSERT_TRUE(fut.get().OK()); } From eeffc0b2131ce1db9d19bd8b310125c49a777e55 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 12 Oct 2023 16:47:11 +0800 Subject: [PATCH 2/3] let it hung --- rabit/include/rabit/internal/socket.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/rabit/include/rabit/internal/socket.h b/rabit/include/rabit/internal/socket.h index f1a6699fbf0c..89e3244822df 100644 --- a/rabit/include/rabit/internal/socket.h +++ b/rabit/include/rabit/internal/socket.h @@ -100,9 +100,6 @@ std::enable_if_t, xgboost::collective::Result> PollError(E if ((revents & POLLNVAL) != 0) { return xgboost::system::FailWithCode("Invalid polling request."); } - if ((revents & POLLHUP) != 0) { - return xgboost::system::FailWithCode("Poll hung up."); - } return xgboost::collective::Success(); } From bc1159bc8bb5268b8d86534eeca8a07e841d6415 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 12 Oct 2023 16:48:19 +0800 Subject: [PATCH 3/3] revert --- tests/cpp/collective/test_comm.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/cpp/collective/test_comm.cc b/tests/cpp/collective/test_comm.cc index 57be7bb0a7b4..7792c4c25059 100644 --- a/tests/cpp/collective/test_comm.cc +++ b/tests/cpp/collective/test_comm.cc @@ -18,12 +18,9 @@ TEST_F(CommTest, Channel) { std::vector workers; std::int32_t port = tracker.Port(); - std::vector> refs(n_workers); - for (std::int32_t i = 0; i < n_workers; ++i) { - workers.emplace_back([=, &refs] { - refs[i] = std::make_shared(host, port, timeout, n_workers, i); - auto &worker = *refs[i]; + workers.emplace_back([=] { + WorkerForTest worker{host, port, timeout, n_workers, i}; if (i % 2 == 0) { auto p_chan = worker.Comm().Chan(i + 1); p_chan->SendAll( @@ -44,7 +41,6 @@ TEST_F(CommTest, Channel) { for (auto &w : workers) { w.join(); } - refs = decltype(refs){}; ASSERT_TRUE(fut.get().OK()); }