From 8c0e722b67f06a31ee380541f09e5e646a4651b3 Mon Sep 17 00:00:00 2001 From: Junmin Gu Date: Tue, 13 Jun 2023 14:38:38 -0400 Subject: [PATCH] renamed profiler entries in BP5 added bytes account for posix transport --- source/adios2/engine/bp5/BP5Writer.cpp | 46 ++++++++++--------- .../toolkit/profiling/iochrono/IOChrono.cpp | 26 +++++++---- source/adios2/toolkit/transport/Transport.cpp | 8 ++++ source/adios2/toolkit/transport/Transport.h | 1 + .../toolkit/transport/file/FilePOSIX.cpp | 3 ++ 5 files changed, 53 insertions(+), 31 deletions(-) diff --git a/source/adios2/engine/bp5/BP5Writer.cpp b/source/adios2/engine/bp5/BP5Writer.cpp index 6c8e9293e6..1deb7090cb 100644 --- a/source/adios2/engine/bp5/BP5Writer.cpp +++ b/source/adios2/engine/bp5/BP5Writer.cpp @@ -91,7 +91,7 @@ StepStatus BP5Writer::BeginStep(StepMode mode, const float timeoutSeconds) TimePoint wait_start = Now(); if (m_WriteFuture.valid()) { - m_Profiler.Start("WaitOnAsync"); + m_Profiler.Start("BS_WaitOnAsync"); m_WriteFuture.get(); m_Comm.Barrier(); AsyncWriteDataCleanup(); @@ -110,7 +110,7 @@ StepStatus BP5Writer::BeginStep(StepMode mode, const float timeoutSeconds) << std::endl; } } - m_Profiler.Stop("WaitOnAsync"); + m_Profiler.Stop("BS_WaitOnAsync"); } } @@ -529,9 +529,9 @@ void BP5Writer::EndStep() std::cout << "END STEP starts at: " << ts.count() << std::endl; */ m_BetweenStepPairs = false; PERFSTUBS_SCOPED_TIMER("BP5Writer::EndStep"); - m_Profiler.Start("endstep"); + m_Profiler.Start("ES"); - m_Profiler.Start("close_ts"); + m_Profiler.Start("ES_close"); MarshalAttributes(); // true: advances step @@ -542,9 +542,9 @@ void BP5Writer::EndStep() * AttributeEncodeBuffer and the data encode Vector */ m_ThisTimestepDataSize += TSInfo.DataBuffer->Size(); - m_Profiler.Stop("close_ts"); + m_Profiler.Stop("ES_close"); - m_Profiler.Start("AWD"); + m_Profiler.Start("ES_AWD"); // TSInfo destructor would delete the DataBuffer so we need to save it // for async IO and let the writer free it up when not needed anymore m_AsyncWriteLock.lock(); @@ -555,12 +555,12 @@ void BP5Writer::EndStep() WriteData(TSInfo.DataBuffer); TSInfo.DataBuffer = NULL; - m_Profiler.Stop("AWD"); + m_Profiler.Stop("ES_AWD"); /* * Two-step metadata aggregation */ - m_Profiler.Start("meta_lvl1"); + m_Profiler.Start("ES_meta1"); std::vector MetaBuffer; core::iovec m{TSInfo.MetaEncodeBuffer->Data(), TSInfo.MetaEncodeBuffer->m_FixedSize}; @@ -576,7 +576,7 @@ void BP5Writer::EndStep() if (m_Aggregator->m_Comm.Size() > 1) { // level 1 - m_Profiler.Start("meta_gather1"); + m_Profiler.Start("ES_meta1_gather"); size_t LocalSize = MetaBuffer.size(); std::vector RecvCounts = m_Aggregator->m_Comm.GatherValues(LocalSize, 0); @@ -594,7 +594,7 @@ void BP5Writer::EndStep() m_Aggregator->m_Comm.GathervArrays(MetaBuffer.data(), LocalSize, RecvCounts.data(), RecvCounts.size(), RecvBuffer.data(), 0); - m_Profiler.Stop("meta_gather1"); + m_Profiler.Stop("ES_meta1_gather"); if (m_Aggregator->m_Comm.Rank() == 0) { std::vector @@ -612,8 +612,8 @@ void BP5Writer::EndStep() WriterDataPositions); } } // level 1 - m_Profiler.Stop("meta_lvl1"); - m_Profiler.Start("meta_lvl2"); + m_Profiler.Stop("ES_meta1"); + m_Profiler.Start("ES_meta2"); // level 2 if (m_Aggregator->m_Comm.Rank() == 0) { @@ -623,7 +623,7 @@ void BP5Writer::EndStep() size_t LocalSize = MetaBuffer.size(); if (m_CommAggregators.Size() > 1) { - m_Profiler.Start("meta_gather2"); + m_Profiler.Start("ES_meta2_gather"); RecvCounts = m_CommAggregators.GatherValues(LocalSize, 0); if (m_CommAggregators.Rank() == 0) { @@ -640,7 +640,7 @@ void BP5Writer::EndStep() MetaBuffer.data(), LocalSize, RecvCounts.data(), RecvCounts.size(), RecvBuffer.data(), 0); buf = &RecvBuffer; - m_Profiler.Stop("meta_gather2"); + m_Profiler.Stop("ES_meta2_gather"); } else { @@ -670,7 +670,7 @@ void BP5Writer::EndStep() } } } // level 2 - m_Profiler.Stop("meta_lvl2"); + m_Profiler.Stop("ES_meta2"); if (m_Parameters.AsyncWrite) { @@ -685,7 +685,7 @@ void BP5Writer::EndStep() } } - m_Profiler.Stop("endstep"); + m_Profiler.Stop("ES"); m_WriterStep++; m_EndStepEnd = Now(); /* Seconds ts2 = Now() - m_EngineStart; @@ -1555,7 +1555,11 @@ void BP5Writer::FlushData(const bool isFinal) void BP5Writer::Flush(const int transportIndex) {} -void BP5Writer::PerformDataWrite() { FlushData(false); } +void BP5Writer::PerformDataWrite() { + m_Profiler.Start("PDW"); + FlushData(false); + m_Profiler.Stop("PDW"); +} void BP5Writer::DestructorClose(bool Verbose) noexcept { @@ -1597,13 +1601,13 @@ void BP5Writer::DoClose(const int transportIndex) Seconds wait(0.0); if (m_WriteFuture.valid()) { - m_Profiler.Start("WaitOnAsync"); + m_Profiler.Start("DC_WaitOnAsync1"); m_AsyncWriteLock.lock(); m_flagRush = true; m_AsyncWriteLock.unlock(); m_WriteFuture.get(); wait += Now() - wait_start; - m_Profiler.Stop("WaitOnAsync"); + m_Profiler.Stop("DC_WaitOnAsync1"); } m_FileDataManager.CloseFiles(transportIndex); @@ -1621,7 +1625,7 @@ void BP5Writer::DoClose(const int transportIndex) if (m_Parameters.AsyncWrite) { // wait until all process' writing thread completes - m_Profiler.Start("WaitOnAsync"); + m_Profiler.Start("DC_WaitOnAsync2"); wait_start = Now(); m_Comm.Barrier(); AsyncWriteDataCleanup(); @@ -1631,7 +1635,7 @@ void BP5Writer::DoClose(const int transportIndex) std::cout << "Close waited " << wait.count() << " seconds on async threads" << std::endl; } - m_Profiler.Stop("WaitOnAsync"); + m_Profiler.Stop("DC_WaitOnAsync2"); } if (m_Comm.Rank() == 0) diff --git a/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp b/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp index 377568deed..ec139cfbe7 100644 --- a/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp +++ b/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp @@ -40,15 +40,21 @@ JSONProfiler::JSONProfiler(helper::Comm const &comm) : m_Comm(comm) m_Profiler.m_IsActive = true; // default is true AddTimerWatch("buffering"); - AddTimerWatch("endstep"); + AddTimerWatch("ES"); AddTimerWatch("PP"); - AddTimerWatch("meta_gather1", false); - AddTimerWatch("meta_gather2", false); - AddTimerWatch("meta_lvl1"); - AddTimerWatch("meta_lvl2"); - AddTimerWatch("close_ts"); - AddTimerWatch("AWD"); + AddTimerWatch("ES_meta1_gather", false); + AddTimerWatch("ES_meta2_gather", false); + + AddTimerWatch("ES_meta1"); + AddTimerWatch("ES_meta2"); + + AddTimerWatch("ES_close"); + AddTimerWatch("ES_AWD"); AddTimerWatch("WaitOnAsync"); + AddTimerWatch("BS_WaitOnAsync"); + AddTimerWatch("DC_WaitOnAsync1"); + AddTimerWatch("DC_WaitOnAsync2"); + AddTimerWatch("PDW"); m_Profiler.m_Bytes.emplace("buffering", 0); @@ -83,9 +89,6 @@ std::string JSONProfiler::GetRankProfilingJSON( std::replace(timeDate.begin(), timeDate.end(), ' ', '_'); rankLog += ", \"start\":\"" + timeDate + "\""; - // rankLog += ", \"threads\":" + std::to_string(m_Parameters.Threads); - rankLog += - ", \"bytes\":" + std::to_string(profiler.m_Bytes.at("buffering")); for (const auto &timerPair : profiler.m_Timers) { @@ -102,6 +105,9 @@ std::string JSONProfiler::GetRankProfilingJSON( rankLog += ", \"transport_" + std::to_string(t) + "\":{"; rankLog += "\"type\":\"" + transportsTypes[t] + "\""; + size_t wBytes = transportsProfilers[t]->m_Bytes.at("write"); + rankLog += ", \"wbytes\":" + std::to_string(wBytes); + for (const auto &transportTimerPair : transportsProfilers[t]->m_Timers) { lf_WriterTimer(rankLog, transportTimerPair.second); diff --git a/source/adios2/toolkit/transport/Transport.cpp b/source/adios2/toolkit/transport/Transport.cpp index 2d48401349..024247afab 100644 --- a/source/adios2/toolkit/transport/Transport.cpp +++ b/source/adios2/toolkit/transport/Transport.cpp @@ -107,6 +107,14 @@ void Transport::Flush() size_t Transport::GetSize() { return 0; } +void Transport::ProfilerWriteBytes(size_t bytes) noexcept +{ + if (m_Profiler.m_IsActive) + { + m_Profiler.m_Bytes.at("write") += bytes; + } +} + void Transport::ProfilerStart(const std::string process) noexcept { if (m_Profiler.m_IsActive) diff --git a/source/adios2/toolkit/transport/Transport.h b/source/adios2/toolkit/transport/Transport.h index 4511155355..30246cdcd8 100644 --- a/source/adios2/toolkit/transport/Transport.h +++ b/source/adios2/toolkit/transport/Transport.h @@ -155,6 +155,7 @@ class Transport virtual void MkDir(const std::string &fileName) = 0; protected: + void ProfilerWriteBytes(size_t bytes) noexcept; void ProfilerStart(const std::string process) noexcept; void ProfilerStop(const std::string process) noexcept; diff --git a/source/adios2/toolkit/transport/file/FilePOSIX.cpp b/source/adios2/toolkit/transport/file/FilePOSIX.cpp index bf41198508..3e86eb6d82 100644 --- a/source/adios2/toolkit/transport/file/FilePOSIX.cpp +++ b/source/adios2/toolkit/transport/file/FilePOSIX.cpp @@ -256,6 +256,7 @@ void FilePOSIX::Write(const char *buffer, size_t size, size_t start) while (size > 0) { ProfilerStart("write"); + ProfilerWriteBytes(size); errno = 0; const auto writtenSize = write(m_FileDescriptor, buffer, size); m_Errno = errno; @@ -350,6 +351,8 @@ void FilePOSIX::WriteV(const core::iovec *iov, const int iovcnt, size_t start) written = static_cast(ret); } + ProfilerWriteBytes(written); + if (written < nBytesExpected) { /* Fall back to write calls with individual buffers */