Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce celer-sim memory usage and improve its CELER_LOG transport output #1550

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
1 change: 1 addition & 0 deletions app/celer-sim/Runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ void Runner::build_transporter_input(RunnerInput const& inp)
transporter_input_->store_step_times = inp.write_step_times;
transporter_input_->action_times = inp.action_times;
transporter_input_->params = core_params_;
transporter_input_->print_progress = inp.print_progress;
}

//---------------------------------------------------------------------------//
Expand Down
2 changes: 2 additions & 0 deletions app/celer-sim/RunnerInput.hh
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ struct RunnerInput
std::string slot_diagnostic_prefix; //!< Base name for slot diagnostic
bool write_track_counts{true}; //!< Output track counts for each step
bool write_step_times{true}; //!< Output elapsed times for each step
bool transporter_result{true}; //!< Output transporter result event data
size_type print_progress{}; //!< CELER_LOG progress every N events

// Control
unsigned int seed{};
Expand Down
4 changes: 4 additions & 0 deletions app/celer-sim/RunnerInputIO.json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ void from_json(nlohmann::json const& j, RunnerInput& v)
LDIO_LOAD_OPTION(slot_diagnostic_prefix);
LDIO_LOAD_OPTION(write_track_counts);
LDIO_LOAD_OPTION(write_step_times);
LDIO_LOAD_OPTION(transporter_result);
LDIO_LOAD_OPTION(print_progress);

LDIO_LOAD_DEPRECATED(max_num_tracks, num_track_slots);
LDIO_LOAD_DEPRECATED(sync, action_times);
Expand Down Expand Up @@ -178,6 +180,8 @@ void to_json(nlohmann::json& j, RunnerInput const& v)
LDIO_SAVE_OPTION(slot_diagnostic_prefix);
LDIO_SAVE(write_track_counts);
LDIO_SAVE(write_step_times);
LDIO_SAVE_OPTION(transporter_result);
LDIO_SAVE_OPTION(print_progress);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just use LDIO_SAVE since their value is meaningful even when not explicitly set.


LDIO_SAVE(seed);
LDIO_SAVE(num_track_slots);
Expand Down
34 changes: 31 additions & 3 deletions app/celer-sim/Transporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#include "corecel/grid/VectorUtils.hh"
#include "corecel/io/Logger.hh"
#include "corecel/io/ScopedTimeLog.hh"
#include "corecel/sys/TraceCounter.hh"
#include "corecel/sys/ScopedSignalHandler.hh"
#include "corecel/sys/TraceCounter.hh"
#include "celeritas/Types.hh"
#include "celeritas/global/ActionSequence.hh"
#include "celeritas/global/CoreParams.hh"
Expand All @@ -46,6 +46,7 @@ template<MemSpace M>
Transporter<M>::Transporter(TransporterInput inp)
: max_steps_(inp.max_steps)
, num_streams_(inp.params->max_streams())
, print_progress_(inp.print_progress)
, store_track_counts_(inp.store_track_counts)
, store_step_times_(inp.store_step_times)
{
Expand Down Expand Up @@ -127,8 +128,8 @@ auto Transporter<M>::operator()(SpanConstPrimary primaries) -> TransporterResult
#else
ScopedSignalHandler interrupted{SIGINT};
#endif
CELER_LOG_LOCAL(status)
<< "Transporting " << primaries.size() << " primaries";

this->progress(primaries.front().event_id, primaries.size());

StepTimer record_step_time{store_step_times_ ? &result.step_times
: nullptr};
Expand Down Expand Up @@ -203,6 +204,33 @@ void Transporter<M>::accum_action_times(MapStrDouble* result) const
}
}
}
//---------------------------------------------------------------------------//
/*!
* Print progress after N events when requested.
*/
template<MemSpace M>
void Transporter<M>::progress(EventId const id,
size_type const num_primaries) const
{
CELER_EXPECT(num_primaries > 0);

auto const id_val = id.unchecked_get();
std::string prim = (num_primaries == 1) ? " primary" : " primaries";
std::string msg = "Event " + std::to_string(id_val) + ": transporting "
+ std::to_string(num_primaries) + prim;
sethrj marked this conversation as resolved.
Show resolved Hide resolved

if (print_progress_)
{
if (id_val % print_progress_ == 0)
{
CELER_LOG_LOCAL(status) << msg;
}
}
else
{
CELER_LOG_LOCAL(status) << msg;
}
sethrj marked this conversation as resolved.
Show resolved Hide resolved
}

//---------------------------------------------------------------------------//
// EXPLICIT INSTANTIATION
Expand Down
8 changes: 8 additions & 0 deletions app/celer-sim/Transporter.hh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ struct TransporterInput
size_type max_steps{};
bool store_track_counts{}; //!< Store track counts at each step
bool store_step_times{}; //!< Store time elapsed for each step
size_type print_progress{}; //!< Print progress every N events
sethrj marked this conversation as resolved.
Show resolved Hide resolved

StreamId stream_id{0};

Expand Down Expand Up @@ -135,11 +136,18 @@ class Transporter final : public TransporterBase
void accum_action_times(MapStrDouble*) const final;

private:
//// DATA ////
std::shared_ptr<Stepper<M>> stepper_;
size_type max_steps_;
size_type num_streams_;
size_type print_progress_;
bool store_track_counts_;
bool store_step_times_;

//// HELPER FUNCTIONS ////

// Print progress
void progress(EventId const id, size_type const num_primaries) const;
};

//---------------------------------------------------------------------------//
Expand Down
22 changes: 16 additions & 6 deletions app/celer-sim/celer-sim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ void run(std::istream* is, std::shared_ptr<OutputRegistry> output)
Runner run_stream(*run_input, output);
SimulationResult result;
result.setup_time = get_setup_time();
result.events.resize(run_stream.num_events());
if (run_input->transporter_result)
{
result.events.resize(run_stream.num_events());
}

// Allocate device streams, or use the default stream if there is only one.
size_type num_streams = run_stream.num_streams();
Expand All @@ -113,7 +116,7 @@ void run(std::istream* is, std::shared_ptr<OutputRegistry> output)

// Start profiling *after* initialization and warmup are complete
Stopwatch get_transport_time;
if (run_input->merge_events)
if (run_input->transporter_result && run_input->merge_events)
{
// Run all events simultaneously on a single stream
result.events.front() = run_stream();
amandalund marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -131,16 +134,23 @@ void run(std::istream* is, std::shared_ptr<OutputRegistry> output)
activate_device_local();

// Run a single event on a single thread
CELER_TRY_HANDLE(result.events[event] = run_stream(
StreamId(get_openmp_thread()), EventId(event)),
capture_exception);
CELER_TRY_HANDLE(
(run_input->transporter_result)
? result.events[event] = run_stream(
StreamId(get_openmp_thread()), EventId(event))
: run_stream(StreamId(get_openmp_thread()), EventId(event)),
capture_exception);
sethrj marked this conversation as resolved.
Show resolved Hide resolved
}
log_and_rethrow(std::move(capture_exception));
}

result.action_times = run_stream.get_action_times();
result.total_time = get_transport_time();
record_mem = {};
output->insert(std::make_shared<RunnerOutput>(std::move(result)));
if (run_input->transporter_result)
{
output->insert(std::make_shared<RunnerOutput>(std::move(result)));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should still write the simulation result (timing info, number of CPU threads) either way.

Suggested change
if (run_input->transporter_result)
{
output->insert(std::make_shared<RunnerOutput>(std::move(result)));
}
output->insert(std::make_shared<RunnerOutput>(std::move(result)));

}

//---------------------------------------------------------------------------//
Expand Down
Loading