Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

An effort to make profiler more user friendly #3660

Merged
merged 1 commit into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 25 additions & 21 deletions source/adios2/engine/bp5/BP5Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ StepStatus BP5Writer::BeginStep(StepMode mode, const float timeoutSeconds)
TimePoint wait_start = Now();
if (m_WriteFuture.valid())
{
m_Profiler.Start("WaitOnAsync");
m_Profiler.Start("BS_WaitOnAsync");
m_WriteFuture.get();
m_Comm.Barrier();
AsyncWriteDataCleanup();
Expand All @@ -110,7 +110,7 @@ StepStatus BP5Writer::BeginStep(StepMode mode, const float timeoutSeconds)
<< std::endl;
}
}
m_Profiler.Stop("WaitOnAsync");
m_Profiler.Stop("BS_WaitOnAsync");
}
}

Expand Down Expand Up @@ -529,9 +529,9 @@ void BP5Writer::EndStep()
std::cout << "END STEP starts at: " << ts.count() << std::endl; */
m_BetweenStepPairs = false;
PERFSTUBS_SCOPED_TIMER("BP5Writer::EndStep");
m_Profiler.Start("endstep");
m_Profiler.Start("ES");

m_Profiler.Start("close_ts");
m_Profiler.Start("ES_close");
MarshalAttributes();

// true: advances step
Expand All @@ -542,9 +542,9 @@ void BP5Writer::EndStep()
* AttributeEncodeBuffer and the data encode Vector */

m_ThisTimestepDataSize += TSInfo.DataBuffer->Size();
m_Profiler.Stop("close_ts");
m_Profiler.Stop("ES_close");

m_Profiler.Start("AWD");
m_Profiler.Start("ES_AWD");
// TSInfo destructor would delete the DataBuffer so we need to save it
// for async IO and let the writer free it up when not needed anymore
m_AsyncWriteLock.lock();
Expand All @@ -555,12 +555,12 @@ void BP5Writer::EndStep()
WriteData(TSInfo.DataBuffer);
TSInfo.DataBuffer = NULL;

m_Profiler.Stop("AWD");
m_Profiler.Stop("ES_AWD");

/*
* Two-step metadata aggregation
*/
m_Profiler.Start("meta_lvl1");
m_Profiler.Start("ES_meta1");
std::vector<char> MetaBuffer;
core::iovec m{TSInfo.MetaEncodeBuffer->Data(),
TSInfo.MetaEncodeBuffer->m_FixedSize};
Expand All @@ -576,7 +576,7 @@ void BP5Writer::EndStep()

if (m_Aggregator->m_Comm.Size() > 1)
{ // level 1
m_Profiler.Start("meta_gather1");
m_Profiler.Start("ES_meta1_gather");
size_t LocalSize = MetaBuffer.size();
std::vector<size_t> RecvCounts =
m_Aggregator->m_Comm.GatherValues(LocalSize, 0);
Expand All @@ -594,7 +594,7 @@ void BP5Writer::EndStep()
m_Aggregator->m_Comm.GathervArrays(MetaBuffer.data(), LocalSize,
RecvCounts.data(), RecvCounts.size(),
RecvBuffer.data(), 0);
m_Profiler.Stop("meta_gather1");
m_Profiler.Stop("ES_meta1_gather");
if (m_Aggregator->m_Comm.Rank() == 0)
{
std::vector<format::BP5Base::MetaMetaInfoBlock>
Expand All @@ -612,8 +612,8 @@ void BP5Writer::EndStep()
WriterDataPositions);
}
} // level 1
m_Profiler.Stop("meta_lvl1");
m_Profiler.Start("meta_lvl2");
m_Profiler.Stop("ES_meta1");
m_Profiler.Start("ES_meta2");
// level 2
if (m_Aggregator->m_Comm.Rank() == 0)
{
Expand All @@ -623,7 +623,7 @@ void BP5Writer::EndStep()
size_t LocalSize = MetaBuffer.size();
if (m_CommAggregators.Size() > 1)
{
m_Profiler.Start("meta_gather2");
m_Profiler.Start("ES_meta2_gather");
RecvCounts = m_CommAggregators.GatherValues(LocalSize, 0);
if (m_CommAggregators.Rank() == 0)
{
Expand All @@ -640,7 +640,7 @@ void BP5Writer::EndStep()
MetaBuffer.data(), LocalSize, RecvCounts.data(),
RecvCounts.size(), RecvBuffer.data(), 0);
buf = &RecvBuffer;
m_Profiler.Stop("meta_gather2");
m_Profiler.Stop("ES_meta2_gather");
}
else
{
Expand Down Expand Up @@ -670,7 +670,7 @@ void BP5Writer::EndStep()
}
}
} // level 2
m_Profiler.Stop("meta_lvl2");
m_Profiler.Stop("ES_meta2");

if (m_Parameters.AsyncWrite)
{
Expand All @@ -685,7 +685,7 @@ void BP5Writer::EndStep()
}
}

m_Profiler.Stop("endstep");
m_Profiler.Stop("ES");
m_WriterStep++;
m_EndStepEnd = Now();
/* Seconds ts2 = Now() - m_EngineStart;
Expand Down Expand Up @@ -1555,7 +1555,11 @@ void BP5Writer::FlushData(const bool isFinal)

void BP5Writer::Flush(const int transportIndex) {}

void BP5Writer::PerformDataWrite() { FlushData(false); }
void BP5Writer::PerformDataWrite() {
m_Profiler.Start("PDW");
FlushData(false);
m_Profiler.Stop("PDW");
}

void BP5Writer::DestructorClose(bool Verbose) noexcept
{
Expand Down Expand Up @@ -1597,13 +1601,13 @@ void BP5Writer::DoClose(const int transportIndex)
Seconds wait(0.0);
if (m_WriteFuture.valid())
{
m_Profiler.Start("WaitOnAsync");
m_Profiler.Start("DC_WaitOnAsync1");
m_AsyncWriteLock.lock();
m_flagRush = true;
m_AsyncWriteLock.unlock();
m_WriteFuture.get();
wait += Now() - wait_start;
m_Profiler.Stop("WaitOnAsync");
m_Profiler.Stop("DC_WaitOnAsync1");
}

m_FileDataManager.CloseFiles(transportIndex);
Expand All @@ -1621,7 +1625,7 @@ void BP5Writer::DoClose(const int transportIndex)
if (m_Parameters.AsyncWrite)
{
// wait until all process' writing thread completes
m_Profiler.Start("WaitOnAsync");
m_Profiler.Start("DC_WaitOnAsync2");
wait_start = Now();
m_Comm.Barrier();
AsyncWriteDataCleanup();
Expand All @@ -1631,7 +1635,7 @@ void BP5Writer::DoClose(const int transportIndex)
std::cout << "Close waited " << wait.count()
<< " seconds on async threads" << std::endl;
}
m_Profiler.Stop("WaitOnAsync");
m_Profiler.Stop("DC_WaitOnAsync2");
}

if (m_Comm.Rank() == 0)
Expand Down
26 changes: 16 additions & 10 deletions source/adios2/toolkit/profiling/iochrono/IOChrono.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,21 @@ JSONProfiler::JSONProfiler(helper::Comm const &comm) : m_Comm(comm)
m_Profiler.m_IsActive = true; // default is true

AddTimerWatch("buffering");
AddTimerWatch("endstep");
AddTimerWatch("ES");
AddTimerWatch("PP");
AddTimerWatch("meta_gather1", false);
AddTimerWatch("meta_gather2", false);
AddTimerWatch("meta_lvl1");
AddTimerWatch("meta_lvl2");
AddTimerWatch("close_ts");
AddTimerWatch("AWD");
AddTimerWatch("ES_meta1_gather", false);
AddTimerWatch("ES_meta2_gather", false);

AddTimerWatch("ES_meta1");
AddTimerWatch("ES_meta2");

AddTimerWatch("ES_close");
AddTimerWatch("ES_AWD");
AddTimerWatch("WaitOnAsync");
AddTimerWatch("BS_WaitOnAsync");
AddTimerWatch("DC_WaitOnAsync1");
AddTimerWatch("DC_WaitOnAsync2");
AddTimerWatch("PDW");

m_Profiler.m_Bytes.emplace("buffering", 0);

Expand Down Expand Up @@ -83,9 +89,6 @@ std::string JSONProfiler::GetRankProfilingJSON(
std::replace(timeDate.begin(), timeDate.end(), ' ', '_');

rankLog += ", \"start\":\"" + timeDate + "\"";
// rankLog += ", \"threads\":" + std::to_string(m_Parameters.Threads);
rankLog +=
", \"bytes\":" + std::to_string(profiler.m_Bytes.at("buffering"));

for (const auto &timerPair : profiler.m_Timers)
{
Expand All @@ -102,6 +105,9 @@ std::string JSONProfiler::GetRankProfilingJSON(
rankLog += ", \"transport_" + std::to_string(t) + "\":{";
rankLog += "\"type\":\"" + transportsTypes[t] + "\"";

size_t wBytes = transportsProfilers[t]->m_Bytes.at("write");
rankLog += ", \"wbytes\":" + std::to_string(wBytes);

for (const auto &transportTimerPair : transportsProfilers[t]->m_Timers)
{
lf_WriterTimer(rankLog, transportTimerPair.second);
Expand Down
8 changes: 8 additions & 0 deletions source/adios2/toolkit/transport/Transport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ void Transport::Flush()

size_t Transport::GetSize() { return 0; }

void Transport::ProfilerWriteBytes(size_t bytes) noexcept
{
if (m_Profiler.m_IsActive)
{
m_Profiler.m_Bytes.at("write") += bytes;
}
}

void Transport::ProfilerStart(const std::string process) noexcept
{
if (m_Profiler.m_IsActive)
Expand Down
1 change: 1 addition & 0 deletions source/adios2/toolkit/transport/Transport.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class Transport
virtual void MkDir(const std::string &fileName) = 0;

protected:
void ProfilerWriteBytes(size_t bytes) noexcept;
void ProfilerStart(const std::string process) noexcept;

void ProfilerStop(const std::string process) noexcept;
Expand Down
3 changes: 3 additions & 0 deletions source/adios2/toolkit/transport/file/FilePOSIX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ void FilePOSIX::Write(const char *buffer, size_t size, size_t start)
while (size > 0)
{
ProfilerStart("write");
ProfilerWriteBytes(size);
errno = 0;
const auto writtenSize = write(m_FileDescriptor, buffer, size);
m_Errno = errno;
Expand Down Expand Up @@ -350,6 +351,8 @@ void FilePOSIX::WriteV(const core::iovec *iov, const int iovcnt, size_t start)
written = static_cast<size_t>(ret);
}

ProfilerWriteBytes(written);

if (written < nBytesExpected)
{
/* Fall back to write calls with individual buffers */
Expand Down