From 873666bd072d07a3ee77d047578d4563f47fdbd9 Mon Sep 17 00:00:00 2001 From: Norbert Podhorszki Date: Wed, 13 Feb 2019 05:43:10 -0500 Subject: [PATCH 01/16] Support option 'accuracy' in MGARD, as a synonym for tolerance --- .../operator/compress/CompressMGARD.cpp | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/source/adios2/operator/compress/CompressMGARD.cpp b/source/adios2/operator/compress/CompressMGARD.cpp index 9da1bb33ac..bff39ca90d 100644 --- a/source/adios2/operator/compress/CompressMGARD.cpp +++ b/source/adios2/operator/compress/CompressMGARD.cpp @@ -72,18 +72,26 @@ size_t CompressMGARD::Compress(const void *dataIn, const Dims &dimensions, } // Parameters + bool hasTolerance = false; + double tolerance; + auto itAccuracy = parameters.find("accuracy"); + if (itAccuracy != parameters.end()) + { + tolerance = std::stod(itAccuracy->second); + hasTolerance = true; + } auto itTolerance = parameters.find("tolerance"); - if (m_DebugMode) + if (itTolerance != parameters.end()) { - if (itTolerance == parameters.end()) - { - throw std::invalid_argument("ERROR: missing mandatory parameter " - "tolerance for MGARD compression " - "operator, in call to Put\n"); - } + tolerance = std::stod(itTolerance->second); + hasTolerance = true; + } + if (!hasTolerance) + { + throw std::invalid_argument("ERROR: missing mandatory parameter " + "tolerance for MGARD compression " + "operator\n"); } - - double tolerance = std::stod(itTolerance->second); int sizeOut = 0; unsigned char *dataOutPtr = From 75f08baf9ad77492b09422c81490ae6b0ecfd701 Mon Sep 17 00:00:00 2001 From: Norbert Podhorszki Date: Fri, 15 Feb 2019 16:45:16 -0500 Subject: [PATCH 02/16] Sort variable and attributes names to return the same list as the C++/Python API in adios2_inquire_all_variables() and adios2_inquire_all_attributes() --- bindings/C/c/adios2_c_io.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/bindings/C/c/adios2_c_io.cpp b/bindings/C/c/adios2_c_io.cpp index 3e7e3937fe..857a2f3271 100644 --- a/bindings/C/c/adios2_c_io.cpp +++ b/bindings/C/c/adios2_c_io.cpp @@ -364,10 +364,16 @@ adios2_error adios2_inquire_all_variables(adios2_variable ***variables, *size = dataMap.size(); adios2_variable **list = (adios2_variable **)calloc(*size, sizeof(adios2_variable *)); + + // Sort the names so that we return the same order as the + // C++, python APIs + std::set names; + for (auto& it : dataMap) names.insert(it.first); + size_t n = 0; - for (auto it = dataMap.begin(); it != dataMap.end(); ++it) + for (auto& name : names) { - const std::string name(it->first); + auto it = dataMap.find(name); const std::string type(it->second.first); adios2::core::VariableBase *variable = nullptr; @@ -887,10 +893,16 @@ adios2_error adios2_inquire_all_attributes(adios2_attribute ***attributes, *size = dataMap.size(); adios2_attribute **list = (adios2_attribute **)calloc(*size, sizeof(adios2_attribute *)); + + // Sort the names so that we return the same order as the + // C++, python APIs + std::set names; + for (auto& it : dataMap) names.insert(it.first); + size_t n = 0; - for (auto it = dataMap.begin(); it != dataMap.end(); ++it) + for (auto& name : names) { - const std::string name(it->first); + auto it = dataMap.find(name); const std::string type(it->second.first); adios2::core::AttributeBase *attribute = nullptr; From 8c0f8eae496bb3b33a6f0c69e3ffdf9e7d5f4524 Mon Sep 17 00:00:00 2001 From: Norbert Podhorszki Date: Wed, 13 Feb 2019 07:48:34 -0500 Subject: [PATCH 03/16] clang-format --- bindings/C/c/adios2_c_io.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bindings/C/c/adios2_c_io.cpp b/bindings/C/c/adios2_c_io.cpp index 857a2f3271..44bb89af8d 100644 --- a/bindings/C/c/adios2_c_io.cpp +++ b/bindings/C/c/adios2_c_io.cpp @@ -365,13 +365,14 @@ adios2_error adios2_inquire_all_variables(adios2_variable ***variables, adios2_variable **list = (adios2_variable **)calloc(*size, sizeof(adios2_variable *)); - // Sort the names so that we return the same order as the + // Sort the names so that we return the same order as the // C++, python APIs std::set names; - for (auto& it : dataMap) names.insert(it.first); + for (auto &it : dataMap) + names.insert(it.first); size_t n = 0; - for (auto& name : names) + for (auto &name : names) { auto it = dataMap.find(name); const std::string type(it->second.first); @@ -894,13 +895,14 @@ adios2_error adios2_inquire_all_attributes(adios2_attribute ***attributes, adios2_attribute **list = (adios2_attribute **)calloc(*size, sizeof(adios2_attribute *)); - // Sort the names so that we return the same order as the + // Sort the names so that we return the same order as the // C++, python APIs std::set names; - for (auto& it : dataMap) names.insert(it.first); + for (auto &it : dataMap) + names.insert(it.first); size_t n = 0; - for (auto& name : names) + for (auto &name : names) { auto it = dataMap.find(name); const std::string type(it->second.first); From be48fd85e6ea93105adaed989af6e63ec7b04890 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Thu, 14 Feb 2019 16:31:52 -0500 Subject: [PATCH 04/16] Restore SST no-mpi testing by letting staging-common activate on a no-mpi build --- testing/adios2/engine/CMakeLists.txt | 2 +- .../engine/staging-common/CMakeLists.txt | 33 +++++++++---------- .../engine/staging-common/run_multi_test.in | 3 ++ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/testing/adios2/engine/CMakeLists.txt b/testing/adios2/engine/CMakeLists.txt index 3712c759a4..1d114df4b9 100644 --- a/testing/adios2/engine/CMakeLists.txt +++ b/testing/adios2/engine/CMakeLists.txt @@ -21,6 +21,6 @@ endif() if(ADIOS2_HAVE_MPI) add_subdirectory(common) add_subdirectory(insitumpi) - add_subdirectory(staging-common) endif() +add_subdirectory(staging-common) diff --git a/testing/adios2/engine/staging-common/CMakeLists.txt b/testing/adios2/engine/staging-common/CMakeLists.txt index 6865af918b..46422e70d9 100644 --- a/testing/adios2/engine/staging-common/CMakeLists.txt +++ b/testing/adios2/engine/staging-common/CMakeLists.txt @@ -5,27 +5,24 @@ find_package(Threads REQUIRED) -set(extra_test_args EXEC_WRAPPER ${MPIEXEC_COMMAND}) - -add_executable(TestStagingMPMD TestStagingMPMD.cpp) -target_link_libraries(TestStagingMPMD adios2 gtest ${CMAKE_THREAD_LIBS_INIT}) if(ADIOS2_HAVE_MPI) - target_link_libraries(TestStagingMPMD adios2 gtest_interface MPI::MPI_C) -endif() + set(extra_test_args EXEC_WRAPPER ${MPIEXEC_COMMAND}) -if(ADIOS2_HAVE_SST) -gtest_add_tests(TARGET TestStagingMPMD ${extra_test_args} - EXTRA_ARGS "SST" "MarshalMethod:FFS" - TEST_SUFFIX _SST_FFS) -gtest_add_tests(TARGET TestStagingMPMD ${extra_test_args} - EXTRA_ARGS "SST" "MarshalMethod:BP" - TEST_SUFFIX _SST_BP) -endif() + add_executable(TestStagingMPMD TestStagingMPMD.cpp) + target_link_libraries(TestStagingMPMD adios2 gtest ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(TestStagingMPMD adios2 gtest_interface MPI::MPI_C) + if(ADIOS2_HAVE_SST) + gtest_add_tests(TARGET TestStagingMPMD ${extra_test_args} + EXTRA_ARGS "SST" "MarshalMethod:FFS" + TEST_SUFFIX _SST_FFS) + gtest_add_tests(TARGET TestStagingMPMD ${extra_test_args} + EXTRA_ARGS "SST" "MarshalMethod:BP" + TEST_SUFFIX _SST_BP) + endif() -if(ADIOS2_HAVE_MPI) -gtest_add_tests(TARGET TestStagingMPMD ${extra_test_args} - EXTRA_ARGS "InSituMPI" - TEST_SUFFIX _InSituMPI) + gtest_add_tests(TARGET TestStagingMPMD ${extra_test_args} + EXTRA_ARGS "InSituMPI" + TEST_SUFFIX _InSituMPI) endif() add_executable(TestCommonWrite TestCommonWrite.cpp) diff --git a/testing/adios2/engine/staging-common/run_multi_test.in b/testing/adios2/engine/staging-common/run_multi_test.in index 87bd4a5724..e283107cd5 100755 --- a/testing/adios2/engine/staging-common/run_multi_test.in +++ b/testing/adios2/engine/staging-common/run_multi_test.in @@ -61,6 +61,7 @@ unless($writer_pid) my $writer_arg_str = ""; $writer_arg_str = join(" ", @writer_args) if (@writer_args); my $command = "$writer_exec_cmd $my_dirname/$writer_prog $engine $filename " . $writer_arg_str; + $command =~ s/^\s+//; print "TestDriver: EXECwriter $command writer PID $$\n" if $verbose; my $result = system (split / /,$command); print "TestDriver: System for writer returns ", $result, " \n"; @@ -87,6 +88,7 @@ if ($test_protocol eq "kill_readers") { #Child process is here print "TestDriver: System( $my_dirname/$reader_prog)\n" if $verbose; my $command = "$reader_exec_cmd $my_dirname/$reader_prog $engine $filename " . join(" ", @reader_args); + $command =~ s/^\s+//; my $result = system (split / /,$command); print "TestDriver: System for kill_readers Reader returns ", $result, " \n"; exit $result; @@ -142,6 +144,7 @@ if ($test_protocol eq "one_to_one") { { #Child process is here my $command = "$reader_exec_cmd $my_dirname/$reader_prog $engine $filename " . join(" ", @reader_args); + $command =~ s/^\s+//; print "TestDriver: EXECreader $command\n" if $verbose; my $result = system (split / /,$command); print "TestDriver: System() for one_to_one Reader returned $result\n" if $verbose; From 2f5476fd164798982756e7a445e12e9d4c0e8565 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Thu, 14 Feb 2019 19:51:05 -0500 Subject: [PATCH 05/16] Remove KillReader tests until stabilized. Try to fix VC++ compilation. --- .../engine/staging-common/CMakeLists.txt | 3 ++- .../staging-common/TestCommonClient.cpp | 2 +- .../adios2/engine/staging-common/TestData.h | 20 +++++++++---------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/testing/adios2/engine/staging-common/CMakeLists.txt b/testing/adios2/engine/staging-common/CMakeLists.txt index 46422e70d9..7463121616 100644 --- a/testing/adios2/engine/staging-common/CMakeLists.txt +++ b/testing/adios2/engine/staging-common/CMakeLists.txt @@ -93,7 +93,8 @@ set (FORTRAN_TESTS "") if(ADIOS2_HAVE_Fortran) set (FORTRAN_TESTS "FtoC.1x1;CtoF.1x1.FFS;CtoF.1x1.BP;FtoF.1x1") endif() -set (SPECIAL_TESTS "KillReadersSerialized;KillReaders3Max;TimeoutReader;LatestReader.FFS;LatestReader.BP;DiscardWriter") +#set (SPECIAL_TESTS "KillReadersSerialized;KillReaders3Max;TimeoutReader;LatestReader.FFS;LatestReader.BP;DiscardWriter") +set (SPECIAL_TESTS "TimeoutReader;LatestReader.FFS;LatestReader.BP;DiscardWriter") set (MPI_TESTS "") set (MPI_FORTRAN_TESTS "") diff --git a/testing/adios2/engine/staging-common/TestCommonClient.cpp b/testing/adios2/engine/staging-common/TestCommonClient.cpp index 351ef2377d..09ee13fc86 100644 --- a/testing/adios2/engine/staging-common/TestCommonClient.cpp +++ b/testing/adios2/engine/staging-common/TestCommonClient.cpp @@ -150,7 +150,7 @@ TEST_F(SstReadTest, ADIOS2SstRead) { SkippedSteps++; } - t = currentStep; // starting out + t = (unsigned int)currentStep; // starting out } EXPECT_EQ(currentStep, static_cast(t)); diff --git a/testing/adios2/engine/staging-common/TestData.h b/testing/adios2/engine/staging-common/TestData.h index 6ae21cf19d..7cffb66b2d 100644 --- a/testing/adios2/engine/staging-common/TestData.h +++ b/testing/adios2/engine/staging-common/TestData.h @@ -71,16 +71,16 @@ void generateCommonTestData(int step, int rank, int size) data_I16[i] = (int16_t)(j + 10 * i); data_I32[i] = (int32_t)(j + 10 * i); data_I64[i] = (int64_t)(j + 10 * i); - data_R32[i] = j + 10 * i; - data_R64[i] = j + 10 * i; - data_C32[i].imag(j + 10 * i); - data_C32[i].real(-(j + 10 * i)); - data_C64[i].imag(j + 10 * i); - data_C64[i].real(-(j + 10 * i)); - data_R64_2d[i][0] = j + 10 * i; - data_R64_2d[i][1] = 10000 + j + 10 * i; - data_R64_2d_rev[0][i] = j + 10 * i; - data_R64_2d_rev[1][i] = 10000 + j + 10 * i; + data_R32[i] = (float)j + 10 * i; + data_R64[i] = (double)j + 10 * i; + data_C32[i].imag((float)j + 10 * i); + data_C32[i].real((float)-(j + 10 * i)); + data_C64[i].imag((double)j + 10 * i); + data_C64[i].real((double)-(j + 10 * i)); + data_R64_2d[i][0] = (double)j + 10 * i; + data_R64_2d[i][1] = (double)10000 + j + 10 * i; + data_R64_2d_rev[0][i] = (double)j + 10 * i; + data_R64_2d_rev[1][i] = (double)10000 + j + 10 * i; } } From 24fb740694c1d1e5706af79ba564907c990df912 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Thu, 14 Feb 2019 20:19:12 -0500 Subject: [PATCH 06/16] Try to fix VC++ compilation. --- testing/adios2/engine/staging-common/TestCommonWrite.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/adios2/engine/staging-common/TestCommonWrite.cpp b/testing/adios2/engine/staging-common/TestCommonWrite.cpp index ea922508c5..2f790fecbd 100644 --- a/testing/adios2/engine/staging-common/TestCommonWrite.cpp +++ b/testing/adios2/engine/staging-common/TestCommonWrite.cpp @@ -149,7 +149,7 @@ TEST_F(CommonWriteTest, ADIOS2CommonWrite) for (size_t step = 0; step < NSteps; ++step) { // Generate test data for each process uniquely - generateCommonTestData(step, mpiRank, mpiSize); + generateCommonTestData((int)step, mpiRank, mpiSize); engine.BeginStep(); // Retrieve the variables that previously went out of scope From 5e1d63c69edd9377707fd92f6039b65f3d39a6be Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Fri, 15 Feb 2019 06:22:04 -0500 Subject: [PATCH 07/16] Remove usleep for MSVC --- .../adios2/engine/staging-common/TestCommonClient.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/testing/adios2/engine/staging-common/TestCommonClient.cpp b/testing/adios2/engine/staging-common/TestCommonClient.cpp index 09ee13fc86..205f8dae1a 100644 --- a/testing/adios2/engine/staging-common/TestCommonClient.cpp +++ b/testing/adios2/engine/staging-common/TestCommonClient.cpp @@ -5,8 +5,10 @@ #include #include +#include #include #include +#include #include @@ -115,7 +117,7 @@ TEST_F(SstReadTest, ADIOS2SstRead) while (Status == adios2::StepStatus::NotReady) { BeginStepFailedPolls++; - usleep(1000 * 100); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); Status = engine.BeginStep(adios2::StepMode::NextAvailable, 0.0); } } @@ -124,7 +126,7 @@ TEST_F(SstReadTest, ADIOS2SstRead) if (LongFirstDelay) { LongFirstDelay = 0; - usleep(3 * 1000 * 1000); /* sleep for 3 seconds */ + std::this_thread::sleep_for(std::chrono::seconds(3)); } /* would like to do blocking, but API is inconvenient, so specify an * hour timeout */ @@ -155,7 +157,7 @@ TEST_F(SstReadTest, ADIOS2SstRead) EXPECT_EQ(currentStep, static_cast(t)); - int writerSize; + size_t writerSize; auto scalar_r64 = io.InquireVariable("scalar_r64"); EXPECT_TRUE(scalar_r64); @@ -306,7 +308,8 @@ TEST_F(SstReadTest, ADIOS2SstRead) } if (IncreasingDelay) { - usleep(1000 * DelayMS); /* sleep for DelayMS milliseconds */ + std::this_thread::sleep_for(std::chrono::milliseconds( + DelayMS)); /* sleep for DelayMS milliseconds */ DelayMS += 200; } } From ef5ac954774b8d6cb8fc20cd7a9e4fa9b35367fd Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Fri, 15 Feb 2019 06:30:30 -0500 Subject: [PATCH 08/16] Remove usleep for MSVC --- testing/adios2/engine/staging-common/TestCommonServer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/testing/adios2/engine/staging-common/TestCommonServer.cpp b/testing/adios2/engine/staging-common/TestCommonServer.cpp index 898166ac70..a7fd644bb0 100644 --- a/testing/adios2/engine/staging-common/TestCommonServer.cpp +++ b/testing/adios2/engine/staging-common/TestCommonServer.cpp @@ -7,10 +7,12 @@ #include #include +#include #include #include #include #include +#include #include @@ -196,7 +198,8 @@ TEST_F(CommonServerTest, ADIOS2CommonServer) std::time_t localtime = std::time(NULL); engine.Put(var_time, (int64_t *)&localtime); engine.EndStep(); - usleep(1000 * DelayMS); /* sleep for DelayMS milliseconds */ + std::this_thread::sleep_for(std::chrono::milliseconds( + DelayMS)); /* sleep for DelayMS milliseconds */ step++; #ifdef ADIOS2_HAVE_MPI MPI_Allreduce(&MyCloseNow, &GlobalCloseNow, 1, MPI_INT, MPI_LOR, From 03aa0db8afedaa8220fa814f60fffcfa39ea6c74 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Fri, 15 Feb 2019 06:46:02 -0500 Subject: [PATCH 09/16] add cast --- testing/adios2/engine/staging-common/TestCommonServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/adios2/engine/staging-common/TestCommonServer.cpp b/testing/adios2/engine/staging-common/TestCommonServer.cpp index a7fd644bb0..58198310c9 100644 --- a/testing/adios2/engine/staging-common/TestCommonServer.cpp +++ b/testing/adios2/engine/staging-common/TestCommonServer.cpp @@ -141,7 +141,7 @@ TEST_F(CommonServerTest, ADIOS2CommonServer) while ((std::time(NULL) < EndTime) && !GlobalCloseNow) { // Generate test data for each process uniquely - generateCommonTestData(step, mpiRank, mpiSize); + generateCommonTestData((int)step, mpiRank, mpiSize); engine.BeginStep(); // Retrieve the variables that previously went out of scope From 296f785bde3c18fddff51700ff6b55792b28c5b4 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Fri, 15 Feb 2019 06:48:13 -0500 Subject: [PATCH 10/16] add cast --- testing/adios2/engine/staging-common/TestCommonClient.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testing/adios2/engine/staging-common/TestCommonClient.cpp b/testing/adios2/engine/staging-common/TestCommonClient.cpp index 205f8dae1a..e86f2b6869 100644 --- a/testing/adios2/engine/staging-common/TestCommonClient.cpp +++ b/testing/adios2/engine/staging-common/TestCommonClient.cpp @@ -223,13 +223,14 @@ TEST_F(SstReadTest, ADIOS2SstRead) ASSERT_EQ(var_time.ShapeID(), adios2::ShapeID::GlobalArray); ASSERT_EQ(var_time.Shape()[0], writerSize); - long unsigned int myStart = (writerSize * Nx / mpiSize) * mpiRank; + long unsigned int myStart = + (long unsigned int)(writerSize * Nx / mpiSize) * mpiRank; long unsigned int myLength = - ((writerSize * Nx + mpiSize - 1) / mpiSize); + (long unsigned int)((writerSize * Nx + mpiSize - 1) / mpiSize); if (myStart + myLength > writerSize * Nx) { - myLength = writerSize * Nx - myStart; + myLength = (long unsigned int)writerSize * Nx - myStart; } const adios2::Dims start{myStart}; const adios2::Dims count{myLength}; From 1e16f9fc1db6628a52f00aa853abfee633ffc6cd Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Fri, 15 Feb 2019 07:04:57 -0500 Subject: [PATCH 11/16] add cast --- testing/adios2/engine/staging-common/TestCommonRead.cpp | 2 +- testing/adios2/engine/staging-common/TestCommonWriteAttrs.cpp | 2 +- testing/adios2/engine/staging-common/TestCommonWriteModes.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/adios2/engine/staging-common/TestCommonRead.cpp b/testing/adios2/engine/staging-common/TestCommonRead.cpp index 9431f3f87f..c6d2e126fd 100644 --- a/testing/adios2/engine/staging-common/TestCommonRead.cpp +++ b/testing/adios2/engine/staging-common/TestCommonRead.cpp @@ -103,7 +103,7 @@ TEST_F(CommonReadTest, ADIOS2CommonRead1D8) const size_t currentStep = engine.CurrentStep(); EXPECT_EQ(currentStep, static_cast(t)); - int writerSize; + size_t writerSize; auto scalar_r64 = io.InquireVariable("scalar_r64"); EXPECT_TRUE(scalar_r64); diff --git a/testing/adios2/engine/staging-common/TestCommonWriteAttrs.cpp b/testing/adios2/engine/staging-common/TestCommonWriteAttrs.cpp index ec86a5dbfb..19d4c0db8b 100644 --- a/testing/adios2/engine/staging-common/TestCommonWriteAttrs.cpp +++ b/testing/adios2/engine/staging-common/TestCommonWriteAttrs.cpp @@ -181,7 +181,7 @@ TEST_F(CommonWriteTest, ADIOS2CommonWrite) for (size_t step = 0; step < NSteps; ++step) { // Generate test data for each process uniquely - generateCommonTestData(step, mpiRank, mpiSize); + generateCommonTestData((int)step, mpiRank, mpiSize); engine.BeginStep(); // Retrieve the variables that previously went out of scope diff --git a/testing/adios2/engine/staging-common/TestCommonWriteModes.cpp b/testing/adios2/engine/staging-common/TestCommonWriteModes.cpp index d3a31f2e63..85259768f3 100644 --- a/testing/adios2/engine/staging-common/TestCommonWriteModes.cpp +++ b/testing/adios2/engine/staging-common/TestCommonWriteModes.cpp @@ -150,7 +150,7 @@ TEST_F(CommonWriteTest, ADIOS2CommonWrite) { adios2::Mode WriteMode; // Generate test data for each process uniquely - generateCommonTestData(step, mpiRank, mpiSize); + generateCommonTestData((int)step, mpiRank, mpiSize); engine.BeginStep(); // Retrieve the variables that previously went out of scope From ec42817a82ffab463b0c9c8d130db4993d7f2c05 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Fri, 15 Feb 2019 07:26:11 -0500 Subject: [PATCH 12/16] add cast --- testing/adios2/engine/staging-common/TestCommonRead.cpp | 7 ++++--- .../adios2/engine/staging-common/TestCommonReadAttrs.cpp | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/testing/adios2/engine/staging-common/TestCommonRead.cpp b/testing/adios2/engine/staging-common/TestCommonRead.cpp index c6d2e126fd..c090a52392 100644 --- a/testing/adios2/engine/staging-common/TestCommonRead.cpp +++ b/testing/adios2/engine/staging-common/TestCommonRead.cpp @@ -178,13 +178,14 @@ TEST_F(CommonReadTest, ADIOS2CommonRead1D8) ASSERT_EQ(var_time.ShapeID(), adios2::ShapeID::GlobalArray); ASSERT_EQ(var_time.Shape()[0], writerSize); - long unsigned int myStart = (writerSize * Nx / mpiSize) * mpiRank; + long unsigned int myStart = + (long unsigned int)(writerSize * Nx / mpiSize) * mpiRank; long unsigned int myLength = - ((writerSize * Nx + mpiSize - 1) / mpiSize); + (long unsigned int)((writerSize * Nx + mpiSize - 1) / mpiSize); if (myStart + myLength > writerSize * Nx) { - myLength = writerSize * Nx - myStart; + myLength = (long unsigned int)writerSize * Nx - myStart; } const adios2::Dims start{myStart}; const adios2::Dims count{myLength}; diff --git a/testing/adios2/engine/staging-common/TestCommonReadAttrs.cpp b/testing/adios2/engine/staging-common/TestCommonReadAttrs.cpp index daa3f29512..f799c6ea2c 100644 --- a/testing/adios2/engine/staging-common/TestCommonReadAttrs.cpp +++ b/testing/adios2/engine/staging-common/TestCommonReadAttrs.cpp @@ -116,7 +116,7 @@ TEST_F(CommonReadTest, ADIOS2CommonRead1D8) const size_t currentStep = engine.CurrentStep(); EXPECT_EQ(currentStep, static_cast(t)); - int writerSize; + size_t writerSize; auto attr_s1 = io.InquireAttribute(s1_Single); auto attr_s1a = io.InquireAttribute(s1_Array); @@ -278,13 +278,14 @@ TEST_F(CommonReadTest, ADIOS2CommonRead1D8) ASSERT_EQ(var_time.ShapeID(), adios2::ShapeID::GlobalArray); ASSERT_EQ(var_time.Shape()[0], writerSize); - long unsigned int myStart = (writerSize * Nx / mpiSize) * mpiRank; + long unsigned int myStart = + (long unsigned int)(writerSize * Nx / mpiSize) * mpiRank; long unsigned int myLength = - ((writerSize * Nx + mpiSize - 1) / mpiSize); + (long unsigned int)((writerSize * Nx + mpiSize - 1) / mpiSize); if (myStart + myLength > writerSize * Nx) { - myLength = writerSize * Nx - myStart; + myLength = (long unsigned int)writerSize * Nx - myStart; } const adios2::Dims start{myStart}; const adios2::Dims count{myLength}; From a2952b5e2cb306a834b39437d35d9ea769c2e608 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Thu, 14 Feb 2019 11:14:55 -0500 Subject: [PATCH 13/16] Tweak SST Data transport so that if a transport is unavailable, we don't use it even if preferred. Set SST Control transport to default to sockets. Establish nicer aliases for Control transports. Document control transport specification. --- docs/user_guide/source/engines/sst.rst | 15 ++++++++++- source/adios2/toolkit/sst/cp/cp_common.c | 32 +++++++++++++++++++++--- source/adios2/toolkit/sst/dp/dp.c | 27 ++++++++++++++++++-- 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/docs/user_guide/source/engines/sst.rst b/docs/user_guide/source/engines/sst.rst index 5e5a772be6..a572964184 100644 --- a/docs/user_guide/source/engines/sst.rst +++ b/docs/user_guide/source/engines/sst.rst @@ -152,7 +152,19 @@ applications running on different interconnects, the Wide Area Network (WAN) option should be chosen. This value is interpreted by both SST Writer and Reader engines. -5. **NetworkInterface**: Default **NULL**. In situations in which +6. **DataTransport**: Default **tcp**. This string value specifies +the underlying network communication mechanism to use for performing +control operations in SST. SST can be configured to standard TCP +sockets, which are very reliable and efficient, but which are limited +in their scalability. Alternatively, SST can use a reliable UDP +protocol, that is more scalable, but as of ADIOS2 Release 2.3.1 still +suffers from some reliability problems. (**sockets** is accepted as +equivalent to **tcp** and **udp**, **rudp**, and **enet** are +equivalent to **scalable**. Generally both the reader and writer +should be using the same control transport. This value is interpreted +by both SST Writer and Reader engines. + +7. **NetworkInterface**: Default **NULL**. In situations in which there are multiple possible network interfaces available to SST, this string value specifies which should be used to generate SST's contact information for writers. Generally this should *NOT* be specified @@ -173,5 +185,6 @@ This value is interpreted by only by the SST Writer engine. QueueLimit integer **0** (no queue limits) QueueFullPolicy string **Block**, Discard DataTransport string **default varies by platform**, RDMA, WAN + ControlTransport string **TCP**, Scalable NetworkInterface string **NULL** ======================= ===================== ========================================================= diff --git a/source/adios2/toolkit/sst/cp/cp_common.c b/source/adios2/toolkit/sst/cp/cp_common.c index 6f6d54ce45..83404c9370 100644 --- a/source/adios2/toolkit/sst/cp/cp_common.c +++ b/source/adios2/toolkit/sst/cp/cp_common.c @@ -42,11 +42,10 @@ void CP_validateParams(SstStream Stream, SstParams Params, int Writer) } Stream->QueueFullPolicy = Params->QueueFullPolicy; Stream->RegistrationMethod = Params->RegistrationMethod; - char *SelectedTransport = NULL; if (Params->DataTransport != NULL) { int i; - SelectedTransport = malloc(strlen(Params->DataTransport) + 1); + char *SelectedTransport = malloc(strlen(Params->DataTransport) + 1); for (i = 0; Params->DataTransport[i] != 0; i++) { SelectedTransport[i] = tolower(Params->DataTransport[i]); @@ -69,8 +68,33 @@ void CP_validateParams(SstStream Stream, SstParams Params, int Writer) } if (Params->ControlTransport == NULL) { - /* determine reasonable default, now "enet" */ - Params->ControlTransport = strdup("enet"); + /* determine reasonable default, now "sockets" */ + Params->ControlTransport = strdup("sockets"); + } + else + { + int i; + char *SelectedTransport = malloc(strlen(Params->ControlTransport) + 1); + for (i = 0; Params->ControlTransport[i] != 0; i++) + { + SelectedTransport[i] = tolower(Params->ControlTransport[i]); + } + SelectedTransport[i] = 0; + + /* canonicalize SelectedTransport */ + if ((strcmp(SelectedTransport, "sockets") == 0) || + (strcmp(SelectedTransport, "tcp") == 0)) + { + Params->ControlTransport = strdup("sockets"); + } + else if ((strcmp(SelectedTransport, "udp") == 0) || + (strcmp(SelectedTransport, "rudp") == 0) || + (strcmp(SelectedTransport, "scalable") == 0) || + (strcmp(SelectedTransport, "enet") == 0)) + { + Params->ControlTransport = strdup("enet"); + } + free(SelectedTransport); } Stream->ConnectionUsleepMultiplier = 50; if ((strcmp(Params->ControlTransport, "enet") == 0) && diff --git a/source/adios2/toolkit/sst/dp/dp.c b/source/adios2/toolkit/sst/dp/dp.c index 10bb37dbaa..e509bbdaf6 100644 --- a/source/adios2/toolkit/sst/dp/dp.c +++ b/source/adios2/toolkit/sst/dp/dp.c @@ -64,6 +64,7 @@ CP_DP_Interface SelectDP(CP_Services Svcs, void *CP_Stream, int BestPriority = -1; int BestPrioDP = -1; int i = 0; + int FoundPreferred = 0; if (Params->DataTransport) { Svcs->verbose(CP_Stream, "Prefered dataplane name is \"%s\"\n", @@ -79,8 +80,18 @@ CP_DP_Interface SelectDP(CP_Services Svcs, void *CP_Stream, { if (strcasecmp(List[i].Name, Params->DataTransport) == 0) { - SelectedDP = i; - break; + FoundPreferred = 1; + if (List[i].Priority >= 0) + { + SelectedDP = i; + break; + } + else + { + fprintf(stderr, "Warning: Perferred DataPlane \"%s\" is " + "not available.", + List[i].Name); + } } } if (List[i].Priority > BestPriority) @@ -90,6 +101,11 @@ CP_DP_Interface SelectDP(CP_Services Svcs, void *CP_Stream, } i++; } + if (Params->DataTransport && (FoundPreferred == 0)) + { + fprintf(stderr, "Warning: Preferred DataPlane \"%s\" not found.", + Params->DataTransport); + } if (SelectedDP != -1) { Svcs->verbose(CP_Stream, @@ -115,6 +131,13 @@ CP_DP_Interface SelectDP(CP_Services Svcs, void *CP_Stream, } i++; } + + if (Params->DataTransport) + { + free(Params->DataTransport); + } + Params->DataTransport = strdup(List[SelectedDP].Name); + Ret = List[SelectedDP].Interface; free(List); return Ret; From 5db447da1c0202014c7f86e2421c4948a51096e1 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Sun, 17 Feb 2019 15:14:48 -0500 Subject: [PATCH 14/16] Clean up queue handling in GlobalOp to produce closer to expected results for Discard on queue full --- source/adios2/toolkit/sst/cp/cp_writer.c | 62 +++++++++++++++++++++--- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/source/adios2/toolkit/sst/cp/cp_writer.c b/source/adios2/toolkit/sst/cp/cp_writer.c index 0ac9f47ca7..93db11efc7 100644 --- a/source/adios2/toolkit/sst/cp/cp_writer.c +++ b/source/adios2/toolkit/sst/cp/cp_writer.c @@ -1154,9 +1154,14 @@ static void DoWriterSideGlobalOp(SstStream Stream, int *DiscardIncomingTimestep) ArrivingReader = ArrivingReader->Next; } if (Stream->QueueLimit && - (Stream->QueuedTimestepCount > Stream->QueueLimit)) + ((Stream->QueuedTimestepCount + 1) > Stream->QueueLimit)) { - SendBlock[1] = 1; /* this rank is over stream limit */ + CP_verbose(Stream, + "Writer will be over queue limit, count %d, limit %d\n", + Stream->QueuedTimestepCount, Stream->QueueLimit); + + SendBlock[1] = + 1; /* this rank will be over queue limit with new timestep */ } else { @@ -1207,10 +1212,6 @@ static void DoWriterSideGlobalOp(SstStream Stream, int *DiscardIncomingTimestep) ActiveReaderCount++; } - /* - * Then handle possible incoming connection requests. (Only rank 0 has - * valid info.) - */ int OverLimit = 0; for (int i = 0; i < Stream->CohortSize; i++) { @@ -1220,6 +1221,47 @@ static void DoWriterSideGlobalOp(SstStream Stream, int *DiscardIncomingTimestep) /* we've got all the state we need to know, release data lock */ PTHREAD_MUTEX_UNLOCK(&Stream->DataLock); + /* + * before we add any new readers, see if we are going to need to + * discard anything in the queue to stay in the queue limit + */ + + if (OverLimit) + { + if (Stream->QueueFullPolicy == SstQueueFullDiscard) + { + /* discard things */ + if (ActiveReaderCount == 0) + { + PTHREAD_MUTEX_LOCK(&Stream->DataLock); + /* + * Have to double-check here. While in general, if everyone + * had zero readers at the start, everyone should have the + * same set of timesteps queued and everyone should be doing + * a dequeue here. However, we might have just gone to zero + * active (because of failed or exiting readers), and some + * timesteps might just have been released. (Note: Assuming + * that if having gone to zero readers will have dequeued + * timesteps that had reference counts. So generally we + * must be dequeueing things with a zero reference count + * here.) That is an assumption that should not be + * violated. + */ + if ((Stream->QueuedTimestepCount + 1) >= Stream->QueueLimit) + { + CP_verbose(Stream, "Writer doing discard for overlimit\n"); + DoStreamDiscard(Stream); + OverLimit = 0; /* handled */ + } + PTHREAD_MUTEX_UNLOCK(&Stream->DataLock); + } + } + } + + /* + * Then handle possible incoming connection requests. (Only rank 0 has + * valid info, so only look to RecvBlock[0].) + */ for (int i = 0; i < RecvBlock[0]; i++) { WS_ReaderInfo reader; @@ -1235,7 +1277,9 @@ static void DoWriterSideGlobalOp(SstStream Stream, int *DiscardIncomingTimestep) } /* - * Lastly, we'll decide what to do with the current provided timestep. + * Lastly, we'll decide what to do with the current provided timestep, + * (if it was not discarded before we added new readers). + * * If any writer rank is over the queuelimit, we must discard or block * decision points: If mode is block on queue limit: @@ -1307,7 +1351,9 @@ static void DoWriterSideGlobalOp(SstStream Stream, int *DiscardIncomingTimestep) /* discard things */ if (ActiveReaderCount == 0) { - DoStreamDiscard(Stream); + /* this should have been handled above */ + CP_verbose(Stream, "Finding a late need to discard when Active " + "Readers is zero, shouldn't happen!!\n\n"); } else { From 1e9ce2c0ab05540238bb088e04b02b898f1c968c Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Sun, 17 Feb 2019 18:37:02 -0500 Subject: [PATCH 15/16] Bump timeout on TimeoutReader test because it's close --- testing/adios2/engine/staging-common/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/testing/adios2/engine/staging-common/CMakeLists.txt b/testing/adios2/engine/staging-common/CMakeLists.txt index 7463121616..2ceb5f37ef 100644 --- a/testing/adios2/engine/staging-common/CMakeLists.txt +++ b/testing/adios2/engine/staging-common/CMakeLists.txt @@ -137,6 +137,7 @@ set (KillReadersSerialized_TIMEOUT "300") set (KillReaders3Max_CMD "run_multi_test -test_protocol kill_readers -verbose -nw 3 -nr 2 -max_readers 3 -warg RendezvousReaderCount:0,ControlTransport:sockets -rarg --ignore_time_gap") set (KillReaders3Max_TIMEOUT "300") set (TimeoutReader_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -rarg --non_blocking -warg --ms_delay -warg 2000") +set (KillReadersSerialized_TIMEOUT "60") set (LatestReader.FFS_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -warg --ms_delay -warg 250 -warg --engine_params -warg MarshalMethod:FFS -rarg --latest -rarg --long_first_delay") set (LatestReader.BP_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -warg --ms_delay -warg 250 -warg --engine_params -warg MarshalMethod:BP -rarg --latest -rarg --long_first_delay") set (DiscardWriter_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -warg --engine_params -warg QueueLimit:1,QueueFullPolicy:discard -warg --ms_delay -warg 500 -rarg --discard") From 13720a29b9ba976961bab2e71bde45711330096a Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Sun, 17 Feb 2019 20:11:40 -0500 Subject: [PATCH 16/16] Bump timeout on TimeoutReader test because it's close --- testing/adios2/engine/staging-common/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/adios2/engine/staging-common/CMakeLists.txt b/testing/adios2/engine/staging-common/CMakeLists.txt index 2ceb5f37ef..573cffc2ee 100644 --- a/testing/adios2/engine/staging-common/CMakeLists.txt +++ b/testing/adios2/engine/staging-common/CMakeLists.txt @@ -137,7 +137,7 @@ set (KillReadersSerialized_TIMEOUT "300") set (KillReaders3Max_CMD "run_multi_test -test_protocol kill_readers -verbose -nw 3 -nr 2 -max_readers 3 -warg RendezvousReaderCount:0,ControlTransport:sockets -rarg --ignore_time_gap") set (KillReaders3Max_TIMEOUT "300") set (TimeoutReader_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -rarg --non_blocking -warg --ms_delay -warg 2000") -set (KillReadersSerialized_TIMEOUT "60") +set (TimeoutReaders_TIMEOUT "60") set (LatestReader.FFS_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -warg --ms_delay -warg 250 -warg --engine_params -warg MarshalMethod:FFS -rarg --latest -rarg --long_first_delay") set (LatestReader.BP_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -warg --ms_delay -warg 250 -warg --engine_params -warg MarshalMethod:BP -rarg --latest -rarg --long_first_delay") set (DiscardWriter_CMD "run_multi_test -test_protocol one_to_one -verbose -nw 1 -nr 1 -max_readers 1 -warg --engine_params -warg QueueLimit:1,QueueFullPolicy:discard -warg --ms_delay -warg 500 -rarg --discard")