Skip to content

Commit

Permalink
Fine-grained timing data for the time stepping phase (#1682)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahoenselaar authored Jul 20, 2021
1 parent 2b22ffe commit 2bb929d
Show file tree
Hide file tree
Showing 8 changed files with 349 additions and 117 deletions.
27 changes: 27 additions & 0 deletions python/meep.i
Original file line number Diff line number Diff line change
Expand Up @@ -1433,6 +1433,27 @@ void _get_gradient(PyObject *grad, PyObject *fields_a, PyObject *fields_f, PyObj
$1 = NULL;
}


// typemaps for timing data

%typemap(out) std::unordered_map<meep::time_sink, std::vector<double>, std::hash<int> > {
PyObject *out_dict = PyDict_New();
for (const auto& ts_vec : $1) {
const std::vector<double>& timing_vector = ts_vec.second;
PyObject *res = PyList_New(timing_vector.size());
for (size_t i = 0; i < timing_vector.size(); ++i) {
PyList_SetItem(res, i, PyFloat_FromDouble(timing_vector[i]));
}
PyObject *key = PyInteger_FromLong(static_cast<int>(ts_vec.first));
PyDict_SetItem(out_dict, key, res);

Py_DECREF(key);
Py_DECREF(res);
}
$result = out_dict;
}


// Tells Python to take ownership of the h5file* this function returns so that
// it gets garbage collected and the file gets closed.
%newobject meep::fields::open_h5file;
Expand Down Expand Up @@ -1478,8 +1499,14 @@ void _get_gradient(PyObject *grad, PyObject *fields_a, PyObject *fields_f, PyObj
%ignore meep::comms_operation;
%ignore meep::comms_sequence;
%ignore meep::create_comms_manager;
%ignore meep::fields::get_time_spent_on;
%ignore meep::fields::times_spent;
%ignore meep::fields::was_working_on;
%ignore meep::fields::with_timing_scope;
%ignore meep::fields::working_on;
%ignore meep::fields_chunk;
%ignore meep::infinity;
%ignore meep::timing_scope;

%ignore std::vector<meep::volume>::vector(size_type);
%ignore std::vector<meep::volume>::resize;
Expand Down
36 changes: 32 additions & 4 deletions python/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3690,10 +3690,29 @@ def print_times(self):
def mean_time_spent_on(self, time_sink):
"""
Return the mean time spent by all processes for a type of work `time_sink` which
can be one of ten integer values `0`-`9`: (`0`) connecting chunks, (`1`) time stepping,
(`2`) copying boundaries, (`3`) MPI all-to-all communication/synchronization,
(`4`) MPI one-to-one communication, (`5`) field output, (`6`) Fourier transforming,
(`7`) MPB mode solver, (`8`) near-to-far field transformation, and (`9`) other.
can be one of the following integer constants:
* meep.Stepping ("time stepping")
* meep.Connecting ("connecting chunks")
* meep.Boundaries ("copying boundaries")
* meep.MpiAllTime ("all-all communication")
* meep.MpiOneTime ("1-1 communication")
* meep.FieldOutput ("outputting fields")
* meep.FourierTransforming ("Fourier transforming")
* meep.MPBTime ("MPB mode solver")
* meep.GetFarfieldsTime ("far-field transform")
* meep.FieldUpdateB ("updating B field")
* meep.FieldUpdateH ("updating H field")
* meep.FieldUpdateD ("updating D field")
* meep.FieldUpdateE ("updating E field")
* meep.BoundarySteppingB ("boundary stepping B")
* meep.BoundarySteppingWH ("boundary stepping WH")
* meep.BoundarySteppingPH ("boundary stepping PH")
* meep.BoundarySteppingH ("boundary stepping H")
* meep.BoundarySteppingD ("boundary stepping D")
* meep.BoundarySteppingWE ("boundary stepping WE")
* meep.BoundarySteppingPE ("boundary stepping PE")
* meep.BoundarySteppingE ("boundary stepping E")
* meep.Other ("everything else")
"""
return self.fields.mean_time_spent_on(time_sink)

Expand All @@ -3707,6 +3726,15 @@ def time_spent_on(self, time_sink):
"""
return self.fields.time_spent_on(time_sink)

def get_timing_data(self):
"""
Returns a dictionary that maps each `time_sink` to a list with one entry
per process. The entries in the list correspond to the total amount of
time in seconds spent on a particular type of operation. The set of
valid time sinks is the same as for `mean_time_spent_on`.
"""
return self.fields.get_timing_data()

def output_times(self, fname):
"""
Call after running a simulation to output to a file with filename `fname` the
Expand Down
42 changes: 42 additions & 0 deletions python/tests/test_simulation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
import os
import re
import sys
Expand Down Expand Up @@ -579,6 +580,47 @@ def print_field(sim):

self.assertAlmostEqual(result[0], -0.0599602798684155)

def test_timing_data(self):
resolution = 20
cell_size = mp.Vector3(10, 10)
pml = [mp.PML(1)]
center = mp.Vector3(2, -1)
result = []
fcen = 0.15
df = 0.1

sources = [mp.Source(src=mp.GaussianSource(fcen, fwidth=df), component=mp.Ez,
center=mp.Vector3())]
geometry = [mp.Block(center=mp.Vector3(), size=mp.Vector3(mp.inf, 3, mp.inf),
material=mp.Medium(epsilon=12))]

sim = mp.Simulation(resolution=resolution, cell_size=cell_size, boundary_layers=pml,
sources=sources, geometry=geometry, geometry_center=center)
sim.run(until=50)
timing_data = sim.get_timing_data()

# Non-exhaustive collection of steps where some time should be spent:
EXPECTED_NONZERO_TIMESINKS = (mp.Stepping, mp.Boundaries,
mp.FieldUpdateB, mp.FieldUpdateH,
mp.FieldUpdateD, mp.FieldUpdateE)
# Due to the problem setup, no time should be spent on these steps:
EXPECTED_ZERO_TIMESINKS = (mp.MPBTime, mp.GetFarfieldsTime)

for sink in itertools.chain(EXPECTED_NONZERO_TIMESINKS,
EXPECTED_ZERO_TIMESINKS):
self.assertIn(sink, timing_data.keys())
self.assertEqual(len(timing_data[sink]), mp.count_processors())
np.testing.assert_array_equal(sim.time_spent_on(sink),
timing_data[sink])

for sink in EXPECTED_NONZERO_TIMESINKS:
for t in timing_data[sink]:
self.assertGreater(t, 0)

for sink in EXPECTED_ZERO_TIMESINKS:
for t in timing_data[sink]:
self.assertEqual(t, 0)

def test_source_slice(self):
sim = self.init_simple_simulation()
sim.run(until=1)
Expand Down
2 changes: 1 addition & 1 deletion scheme/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ endif

# workaround missing namespace prefix in swig
meep_renames.i: $(LIBHDRS)
(echo "// AUTOMATICALLY GENERATED -- DO NOT EDIT"; perl -pe 's/^ *class +([A-Za-z_0-9:]*)( *| *:[^{]*){.*$$/%rename(meep_\1) meep::\1;/' $(LIBHDRS) | grep "%rename" | sed '/meep_fields_chunk/d ; /meep_comms_manager/d ; /meep_comms_key_hash_fn/d' | sort -u; echo; grep -hv typedef $(LIBHDRS) | perl -pe 's/(inline|const|extern|static) +//g' | perl -pe 's/^[A-Za-z_0-9:<>]+[* ]+([A-Za-z_0-9:]*) *\(.*$$/%rename(meep_\1) meep::\1;/' | grep "%rename" | sed '/choose_chunkdivision/d ; /meep_create_comms_manager/d' | sort -u; ) > $@
(echo "// AUTOMATICALLY GENERATED -- DO NOT EDIT"; perl -pe 's/^ *class +([A-Za-z_0-9:]*)( *| *:[^{]*){.*$$/%rename(meep_\1) meep::\1;/' $(LIBHDRS) | grep "%rename" | sed '/meep_fields_chunk/d ; /meep_comms_manager/d ; /meep_comms_key_hash_fn/d ; /meep_timing_scope/d' | sort -u; echo; grep -hv typedef $(LIBHDRS) | perl -pe 's/(inline|const|extern|static) +//g' | perl -pe 's/^[A-Za-z_0-9:<>]+[* ]+([A-Za-z_0-9:]*) *\(.*$$/%rename(meep_\1) meep::\1;/' | grep "%rename" | sed '/choose_chunkdivision/d ; /meep_create_comms_manager/d' | sort -u; ) > $@

# work around bug in swig, where it doesn't prepend namespace to friend funcs
meep_swig_bug_workaround.i: $(LIBHDRS)
Expand Down
5 changes: 3 additions & 2 deletions src/fields.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ using namespace std;
namespace meep {

fields::fields(structure *s, double m, double beta, bool zero_fields_near_cylorigin)
: S(s->S), gv(s->gv), user_volume(s->user_volume), v(s->v), m(m), beta(beta) {
: S(s->S), gv(s->gv), user_volume(s->user_volume), v(s->v), m(m), beta(beta),
working_on(&times_spent) {
shared_chunks = s->shared_chunks;
components_allocated = false;
synchronized_magnetic_fields = 0;
Expand Down Expand Up @@ -82,7 +83,7 @@ fields::fields(structure *s, double m, double beta, bool zero_fields_near_cylori
}

fields::fields(const fields &thef)
: S(thef.S), gv(thef.gv), user_volume(thef.user_volume), v(thef.v) {
: S(thef.S), gv(thef.gv), user_volume(thef.user_volume), v(thef.v), working_on(&times_spent) {
shared_chunks = thef.shared_chunks;
components_allocated = thef.components_allocated;
synchronized_magnetic_fields = thef.synchronized_magnetic_fields;
Expand Down
56 changes: 47 additions & 9 deletions src/meep.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <limits>
#include <memory>
#include <unordered_map>
#include <vector>

#include <stdio.h>
#include <stddef.h>
#include <math.h>
Expand All @@ -29,8 +31,6 @@
#include "meep/mympi.hpp"
#include "meep/meep-config.h"

#include <vector>

namespace meep {

/* The (time-domain) fields arrays of the fields_chunk class as well
Expand Down Expand Up @@ -1561,7 +1561,38 @@ enum time_sink {
FourierTransforming,
MPBTime,
GetFarfieldsTime,
Other
Other,
FieldUpdateB,
FieldUpdateH,
FieldUpdateD,
FieldUpdateE,
BoundarySteppingB,
BoundarySteppingWH,
BoundarySteppingPH,
BoundarySteppingH,
BoundarySteppingD,
BoundarySteppingWE,
BoundarySteppingPE,
BoundarySteppingE
};
using time_sink_to_duration_map = std::unordered_map<time_sink, double, std::hash<int>>;

// RAII-based profiling timer that accumulates wall time from creation until it
// is destroyed or the `exit` method is invoked. Not thread-safe.
class timing_scope {
public:
// Creates a `timing_scope` that persists timing information in `timers_` but does not take
// ownership of it.
explicit timing_scope(time_sink_to_duration_map *timers_, time_sink sink_ = Other);
~timing_scope();
// Stops time accumulation for the timing_scope.
void exit();

private:
time_sink_to_duration_map *timers; // Not owned by us.
time_sink sink;
bool active;
double t_start;
};

typedef void (*field_chunkloop)(fields_chunk *fc, int ichunk, component cgrid, ivec is, ivec ie,
Expand Down Expand Up @@ -1674,7 +1705,7 @@ class fields {
void reset();

// time.cpp
std::vector<double> time_spent_on(time_sink);
std::vector<double> time_spent_on(time_sink sink);
double mean_time_spent_on(time_sink);
void print_times();
// boundaries.cpp
Expand Down Expand Up @@ -2108,9 +2139,9 @@ class fields {
private:
int synchronized_magnetic_fields; // count number of nested synchs
double last_wall_time;
#define MEEP_TIMING_STACK_SZ 10
time_sink working_on, was_working_on[MEEP_TIMING_STACK_SZ];
double times_spent[Other + 1];
std::vector<time_sink> was_working_on;
time_sink_to_duration_map times_spent;
timing_scope working_on;
// fields.cpp
void figure_out_step_plan();
// boundaries.cpp
Expand Down Expand Up @@ -2145,11 +2176,18 @@ class fields {
// boundaries.cpp
bool locate_component_point(component *, ivec *, std::complex<double> *) const;
// time.cpp
void am_now_working_on(time_sink);
void am_now_working_on(time_sink sink);
void finished_working();

double get_time_spent_on(time_sink sink) const;
// Returns a map from time_sink to the vector of total times each MPI process spent on the
// indicated category.
std::unordered_map<time_sink, std::vector<double>, std::hash<int> > get_timing_data() const;

private:
void reset_timers();
timing_scope with_timing_scope(time_sink sink);

private:
// The following is an array that is num_chunks by num_chunks. Actually
// it is two arrays, one for the imaginary and one for the real part.
realnum **comm_blocks[NUM_FIELD_TYPES];
Expand Down
60 changes: 48 additions & 12 deletions src/step.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,28 +62,64 @@ void fields::step() {
chunks[i]->s->update_condinv();

calc_sources(time()); // for B sources
step_db(B_stuff);
{
auto step_timer = with_timing_scope(FieldUpdateB);
step_db(B_stuff);
}
step_source(B_stuff);
step_boundaries(B_stuff);
{
auto step_timer = with_timing_scope(BoundarySteppingB);
step_boundaries(B_stuff);
}
calc_sources(time() + 0.5 * dt); // for integrated H sources
update_eh(H_stuff);
step_boundaries(WH_stuff);
{
auto step_timer = with_timing_scope(FieldUpdateH);
update_eh(H_stuff);
}
{
auto step_timer = with_timing_scope(BoundarySteppingWH);
step_boundaries(WH_stuff);
}
update_pols(H_stuff);
step_boundaries(PH_stuff);
step_boundaries(H_stuff);
{
auto step_timer = with_timing_scope(BoundarySteppingPH);
step_boundaries(PH_stuff);
}
{
auto step_timer = with_timing_scope(BoundarySteppingH);
step_boundaries(H_stuff);
}

if (fluxes) fluxes->update_half();

calc_sources(time() + 0.5 * dt); // for D sources
step_db(D_stuff);
{
auto step_timer = with_timing_scope(FieldUpdateD);
step_db(D_stuff);
}
step_source(D_stuff);
step_boundaries(D_stuff);
{
auto step_timer = with_timing_scope(BoundarySteppingD);
step_boundaries(D_stuff);
}
calc_sources(time() + dt); // for integrated E sources
update_eh(E_stuff);
step_boundaries(WE_stuff);
{
auto step_timer = with_timing_scope(FieldUpdateE);
update_eh(E_stuff);
}
{
auto step_timer = with_timing_scope(BoundarySteppingWE);
step_boundaries(WE_stuff);
}
update_pols(E_stuff);
step_boundaries(PE_stuff);
step_boundaries(E_stuff);
{
auto step_timer = with_timing_scope(BoundarySteppingPE);
step_boundaries(PE_stuff);
}
{
auto step_timer = with_timing_scope(BoundarySteppingE);
step_boundaries(E_stuff);
}

if (fluxes) fluxes->update();
t += 1;
Expand Down
Loading

0 comments on commit 2bb929d

Please sign in to comment.