From d323fedaa8c6f616f01a8ed5e6fcf88cafc1339b Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Fri, 13 Jul 2018 22:04:25 -0400 Subject: [PATCH] Placeholder changes for RDMA dp, reader side failure handling --- source/adios2/toolkit/sst/dp/rdma_dp.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/source/adios2/toolkit/sst/dp/rdma_dp.c b/source/adios2/toolkit/sst/dp/rdma_dp.c index 8d5957b2c8..2f2587381f 100644 --- a/source/adios2/toolkit/sst/dp/rdma_dp.c +++ b/source/adios2/toolkit/sst/dp/rdma_dp.c @@ -737,9 +737,28 @@ static void *RdmaReadRemoteMemory(CP_Services Svcs, DP_RS_Stream Stream_v, return ret; } -static void RdmaWaitForCompletion(CP_Services Svcs, void *Handle_v) +static void RdmaNotifyConnFailure(CP_Services Svcs, DP_RS_Stream Stream_v, + int FailedPeerRank) +{ + Rdma_RS_Stream Stream = (Rdma_RS_Stream) + Stream_v; /* DP_RS_Stream is the return from InitReader */ + CManager cm = Svcs->getCManager(Stream->CP_Stream); + Svcs->verbose(Stream->CP_Stream, "received notification that writer peer " + "%d has failed, failing any pending " + "requests\n", + FailedPeerRank); + // This is what EVPath does... + // FailRequestsToRank(Svcs, cm, Stream, FailedPeerRank); +} + +/* + * RdmaWaitForCompletion should return 1 if successful, but 0 if the reads + * failed for some reason or were aborted by RdmaNotifyConnFailure() + */ +static int RdmaWaitForCompletion(CP_Services Svcs, void *Handle_v) { RdmaCompletionHandle Handle = (RdmaCompletionHandle)Handle_v; + int Ret = 1; Svcs->verbose( Handle->CPStream, "Waiting for completion of memory read to rank %d, condition %d\n", @@ -756,6 +775,7 @@ static void RdmaWaitForCompletion(CP_Services Svcs, void *Handle_v) Handle->Rank, Handle->CMcondition); free(Handle); + return Ret; } static void RdmaProvideTimestep(CP_Services Svcs, DP_WS_Stream Stream_v, @@ -975,6 +995,7 @@ extern CP_DP_Interface LoadRdmaDP() RdmaDPInterface.provideWriterDataToReader = RdmaProvideWriterDataToReader; RdmaDPInterface.readRemoteMemory = RdmaReadRemoteMemory; RdmaDPInterface.waitForCompletion = RdmaWaitForCompletion; + RdmaDPInterface.notifyConnFailure = RdmaNotifyConnFailure; RdmaDPInterface.provideTimestep = RdmaProvideTimestep; RdmaDPInterface.releaseTimestep = RdmaReleaseTimestep; RdmaDPInterface.destroyReader = RdmaDestroyReader;