Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: allow add_srcdata to place sources on non-owned points #1959

Merged
merged 5 commits into from
Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/adjoint/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,12 @@ def place_adjoint_source(self, dJ):
scale = amp_arr * self._adj_src_scale(include_resolution=False)

if self.num_freq == 1:
sources += [mp.IndexedSource(time_src, fourier_data, scale[:,0])]
sources += [mp.IndexedSource(time_src, fourier_data, scale[:,0], not self.yee_grid)]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this only affects code that use Fourier fields yee_grid=false.

else:
src = FilteredSource(time_src.frequency,self._frequencies,scale,self.sim.fields.dt)
(num_basis, num_pts) = src.nodes.shape
for basis_i in range(num_basis):
sources += [mp.IndexedSource(src.time_src_bf[basis_i], fourier_data, src.nodes[basis_i])]
sources += [mp.IndexedSource(src.time_src_bf[basis_i], fourier_data, src.nodes[basis_i], not self.yee_grid)]
return sources

def __call__(self):
Expand Down
3 changes: 2 additions & 1 deletion python/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2454,7 +2454,8 @@ def add_source(self, src):
self.init_sim()

if isinstance(src, IndexedSource):
self.fields.add_srcdata(src.srcdata, src.src.swigobj, src.num_pts, src.amp_arr)
self.fields.register_src_time(src.src.swigobj)
self.fields.add_srcdata(src.srcdata, src.src.swigobj, src.num_pts, src.amp_arr, src.needs_boundary_fix)
return

where = Volume(src.center, src.size, dims=self.dimensions,
Expand Down
3 changes: 2 additions & 1 deletion python/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,9 @@ class IndexedSource(Source):
"""
created a source object using (SWIG-wrapped mp::srcdata*) srcdata.
"""
def __init__(self, src, srcdata, amp_arr):
def __init__(self, src, srcdata, amp_arr, needs_boundary_fix=False):
self.src = src
self.num_pts = len(amp_arr)
self.srcdata = srcdata
self.amp_arr = amp_arr
self.needs_boundary_fix = needs_boundary_fix
2 changes: 1 addition & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ bicgstab.hpp meepgeom.hpp material_data.hpp adjust_verbosity.hpp
libmeep_la_SOURCES = array_slice.cpp anisotropic_averaging.cpp \
bands.cpp boundaries.cpp bicgstab.cpp casimir.cpp \
cw_fields.cpp dft.cpp dft_ldos.cpp energy_and_flux.cpp \
fields.cpp fields_dump.cpp loop_in_chunks.cpp h5fields.cpp h5file.cpp \
fields.cpp fields_dump.cpp fix_boundary_sources.cpp loop_in_chunks.cpp h5fields.cpp h5file.cpp \
initialize.cpp integrate.cpp integrate2.cpp material_data.cpp monitor.cpp mympi.cpp \
multilevel-atom.cpp near2far.cpp output_directory.cpp random.cpp \
sources.cpp step.cpp step_db.cpp stress.cpp structure.cpp structure_dump.cpp \
Expand Down
2 changes: 2 additions & 0 deletions src/fields.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,8 @@ bool fields_chunk::alloc_f(component c) {
// allocate fields for components required by any source on any process
// ... this is needed after calling the low-level fields::add_srcdata
void fields::require_source_components() {
fix_boundary_sources(); // needed if add_srcdata put sources on non-owned points

int needed[NUM_FIELD_COMPONENTS];
memset(needed, 0, sizeof(needed));
for (int i = 0; i < num_chunks; i++) {
Expand Down
177 changes: 177 additions & 0 deletions src/fix_boundary_sources.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#include "meep_internals.hpp"
#include "config.h"

#ifdef HAVE_MPI
#ifdef NEED_UNDEF_SEEK_FOR_MPI
// undef'ing SEEK_* is needed for MPICH, possibly other MPI versions
#undef SEEK_SET
#undef SEEK_END
#undef SEEK_CUR
#endif
#include <mpi.h>
#endif

#include <algorithm>

namespace meep {

#ifdef HAVE_MPI
static MPI_Comm mycomm = MPI_COMM_WORLD;
#endif

// data structure for sending source information from one chunk to another
struct srcpt_info {
std::complex<double> A; // amplitude
ptrdiff_t index; // index in chunk's fields array
size_t src_time_id;
int chunk_idx;
int c; // component
};

// comparison function for sorting srcpt_info lexicographically
// by (processor, src_time_id, chunk_idx, c)
struct srcpt_info_compare {
fields_chunk **chunks;
bool operator() (srcpt_info a, srcpt_info b) {
int aproc = chunks[a.chunk_idx]->n_proc();
int bproc = chunks[b.chunk_idx]->n_proc();
return (aproc != bproc ? aproc < bproc :
(a.src_time_id != b.src_time_id ? a.src_time_id < b.src_time_id :
(a.chunk_idx != b.chunk_idx ? a.chunk_idx < b.chunk_idx :
a.c < b.c)));
}
};

void fields::fix_boundary_sources() {
am_now_working_on(Connecting);

std::vector<srcpt_info> boundarysources;

// find all not-owned source points and figure out in which chunk
// they are actually supposed to be located, storing info in boundarysources.
for (int i = 0; i < num_chunks; i++) {
FOR_FIELD_TYPES(ft) {
for (src_vol &src : chunks[i]->sources[ft])
if (src.needs_boundary_fix) {
for (size_t ipt = 0; ipt < src.num_points(); ++ipt) {
component c = src.c;
ivec here = chunks[i]->gv.iloc(c, src.index_at(ipt));
if (!chunks[i]->gv.owns(here) && src.amplitude(ipt) != 0.0) {
if (src.t()->id == 0) abort("bug: fix_boundary_sources called for non-registered source");

// find the chunk that owns this point, similar to logic in boundaries.cpp
std::complex<double> thephase;
if (locate_component_point(&c, &here, &thephase) && !on_metal_boundary(here)) {
for (int j = 0; j < num_chunks; j++)
if (chunks[j]->gv.owns(here)) {
srcpt_info s = { src.amplitude(ipt)*conj(thephase), chunks[j]->gv.index(c, here), src.t()->id, chunks[j]->chunk_idx, c };
boundarysources.push_back(s);
break;
}
}
src.set_amplitude(ipt, 0.0); // will no longer be needed
}
}
src.needs_boundary_fix = false;
}
}
}

// we need each process's data to be contiguous
srcpt_info_compare compare = {chunks};
std::sort(boundarysources.begin(), boundarysources.end(), compare);

// collect 2d (row-major) arrays offsets and numcomm,
// where numcomm[i,j] is the number of srcpt_info items
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: numcomm[i,j] here is stored at numcomm[i*P + j] (this is "row major order").

// to be set from process i to process j, and offsets[i,j]
// is the corresponding offset in the boundarysources input.
int p = my_rank();
int P = count_processors();
std::vector<size_t> offsets(P * P, size_t(0));
std::vector<size_t> numcomm_(P * P, size_t(0));
size_t idx0 = 0;
int p0 = 0;
for (size_t idx = 0; idx < boundarysources.size(); ++idx) {
int pidx = chunks[boundarysources[idx].chunk_idx]->n_proc();
if (pidx != p0) {
offsets[p*P + p0] = idx0;
numcomm_[p*P + p0] = idx - idx0;
p0 = pidx;
idx0 = idx;
}
}
offsets[p*P + p0] = idx0;
numcomm_[p*P + p0] = boundarysources.size() - idx0;

// collect the numcomm data from all processes
std::vector<size_t> numcomm(P * P, size_t(0));
sum_to_all(numcomm_.data(), numcomm.data(), P*P);

#ifdef HAVE_MPI
// declare an MPI datatype mirroring srcpt_info, so that we can send/receive srcpt_info arrays
int srcpt_info_blocklengths[5] = {2,1,1,1,1};
MPI_Datatype srcpt_info_types[5] = {MPI_DOUBLE, sizeof(ptrdiff_t) == sizeof(int) ? MPI_INT : MPI_LONG_LONG, sizeof(size_t) == sizeof(unsigned) ? MPI_UNSIGNED : MPI_UNSIGNED_LONG_LONG, MPI_INT, MPI_INT};
MPI_Aint srcpt_info_offsets[5] = { offsetof(srcpt_info,A), offsetof(srcpt_info,index), offsetof(srcpt_info,src_time_id), offsetof(srcpt_info,chunk_idx), offsetof(srcpt_info,c) };
MPI_Datatype mpi_srcpt_info;
MPI_Type_create_struct(5, srcpt_info_blocklengths, srcpt_info_offsets, srcpt_info_types, &mpi_srcpt_info);
MPI_Type_commit(&mpi_srcpt_info);
#endif

for (int psrc = 0; psrc < P; ++psrc)
for (int pdest = 0; pdest < P; ++pdest) {
size_t N = numcomm[psrc*P + pdest];
if (N == 0) continue;
if (pdest == p) {
srcpt_info *srcpts;
#ifdef HAVE_MPI
if (psrc != p) {
srcpts = new srcpt_info[N];
MPI_Status status;
MPI_Recv(srcpts, N, mpi_srcpt_info, psrc, psrc*P + pdest, mycomm, &status);
}
else
#endif
srcpts = boundarysources.data() + offsets[psrc*P + pdest];
int chunk_idx = srcpts[0].chunk_idx;
size_t src_time_id = srcpts[0].src_time_id;
int c = srcpts[0].c;
size_t idx0 = 0;
for (size_t idx = 0; idx <= N; ++idx) {
if (idx == N || srcpts[idx].chunk_idx != chunk_idx || srcpts[idx].src_time_id != src_time_id || srcpts[idx].c != c) {
std::vector<ptrdiff_t> idx_arr(idx - idx0);
std::vector<std::complex<double> > amp_arr(idx - idx0);
for (size_t i = idx0; i < idx; ++i) {
idx_arr[i-idx0] = srcpts[i].index;
amp_arr[i-idx0] = srcpts[i].A;
}
sourcedata srcdata = { (component) c, idx_arr, chunk_idx, amp_arr };
src_time *srctime = lookup_src_time(src_time_id);
if (srctime == NULL) abort("bug: unknown src_time_id (missing registration?)");
add_srcdata(srcdata, srctime, size_t(0), NULL, false);
if (idx < N) {
chunk_idx = srcpts[idx].chunk_idx;
src_time_id = srcpts[idx].src_time_id;
c = srcpts[idx].c;
idx0 = idx;
}
}
}

if (psrc != p) delete[] srcpts;
}
#ifdef HAVE_MPI
else if (psrc == p) {
srcpt_info *srcpts = boundarysources.data() + offsets[psrc*P + pdest];
MPI_Send(srcpts, N, mpi_srcpt_info, pdest, psrc*P + pdest, mycomm);
}
#endif
}

#ifdef HAVE_MPI
MPI_Type_free(&mpi_srcpt_info);
#endif

finished_working();
}

} // namespace meep
12 changes: 11 additions & 1 deletion src/meep.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,8 +939,13 @@ class src_time {
// sources are not, but this may change.
bool is_integrated;

// a unique ID > 0 can be assigned to a src_time object by fields::register_src_time,
// in order to communicate it from one process to another; otherwise defaults to 0.
size_t id;

src_time() {
is_integrated = true;
id = 0;
current_time = nan;
current_current = 0.0;
next = NULL;
Expand All @@ -951,6 +956,7 @@ class src_time {
current_time = t.current_time;
current_current = t.current_current;
current_dipole = t.current_dipole;
id = t.id;
if (t.next)
next = t.next->clone();
else
Expand Down Expand Up @@ -1892,7 +1898,9 @@ class fields {
void require_source_components();
void _require_component(component c, bool aniso2d);
void require_component(component c) { _require_component(c, is_aniso2d()); sync_chunk_connections(); }
void add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex<double>* amp_arr);
void add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex<double>* amp_arr, bool needs_boundary_fix);
void register_src_time(src_time *src);
src_time *lookup_src_time(size_t id);

// mpb.cpp

Expand Down Expand Up @@ -2233,6 +2241,8 @@ class fields {
bool locate_point_in_user_volume(ivec *, std::complex<double> *phase) const;
void locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp1[8], vec newp2[8],
std::complex<double> kphase[8], int &ncopies) const;
// fix_boundary_sources.cpp
void fix_boundary_sources();
// step.cpp
void phase_material();
void step_db(field_type ft);
Expand Down
6 changes: 5 additions & 1 deletion src/meep_internals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,16 @@ class src_vol {
// Constructs a new source volume. Takes ownership of `ind` and `amps`.
// Requirement: ind.size() == amps.size()
src_vol(component cc, src_time *st, std::vector<ptrdiff_t> &&ind,
std::vector<std::complex<double> > &&amps);
std::vector<std::complex<double> > &&amps, bool fix_boundaries=false);

// Checks whether `a` and `b` are combinable, i.e. have the same indices and point to the same
// `src_time` instance, but have potentially different amplitudes.
static bool combinable(const src_vol &a, const src_vol &b);

ptrdiff_t index_at(size_t pos) const { return index[pos]; }
std::complex<double> amplitude(size_t j) const { return amp[j]; };
void set_amplitude(size_t j, std::complex<double> a) { amp[j] = a; };
void set_amplitude(size_t j, double a) { amp[j] = a; };
const std::complex<double> &amplitude_at(size_t pos) const { return amp[pos]; }
size_t num_points() const { return index.size(); };
const src_time *t() const { return src_t; };
Expand All @@ -75,6 +78,7 @@ class src_vol {
void add_amplitudes_from(const src_vol &other);

const component c; // field component the source applies to
bool needs_boundary_fix; // whether fix_boundary_sources needs calling
private:
src_time *src_t; // Not owned by us.
std::vector<ptrdiff_t> index; // locations of sources in grid (indices)
Expand Down
33 changes: 28 additions & 5 deletions src/sources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stdlib.h>
#include <math.h>
#include <complex>
#include <assert.h>

#include "meep.hpp"
#include "meep_internals.hpp"
Expand Down Expand Up @@ -165,13 +166,13 @@ bool custom_src_time::is_equal(const src_time &t) const {
/*********************************************************************/

src_vol::src_vol(component cc, src_time *st, std::vector<ptrdiff_t> &&ind,
std::vector<std::complex<double> > &&amps)
std::vector<std::complex<double> > &&amps, bool fix_boundaries)
: c([](component c) -> component {
if (is_D(c)) c = direction_component(Ex, component_direction(c));
if (is_B(c)) c = direction_component(Hx, component_direction(c));
return c;
}(cc)),
src_t(st), index(std::move(ind)), amp(std::move(amps)) {
src_t(st), index(std::move(ind)), amp(std::move(amps)), needs_boundary_fix(fix_boundaries) {
assert(index.size() == amp.size());
}

Expand Down Expand Up @@ -288,18 +289,23 @@ static void src_vol_chunkloop(fields_chunk *fc, int ichunk, component c, ivec is
fc->add_source(ft, src_vol(c, data->src, std::move(index_array), std::move(amps_array)));
}

void fields::add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex<double>* amp_arr){
void fields::add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex<double>* amp_arr, bool needs_boundary_fix){
if (n == 0) {
n = cur_data.idx_arr.size();
assert(amp_arr == NULL);
amp_arr = cur_data.amp_arr.data();
}
assert(n == cur_data.idx_arr.size());
sources = src->add_to(sources, &src);
std::vector<ptrdiff_t> index_arr(cur_data.idx_arr);
std::vector<std::complex<double>> amplitudes(amp_arr, amp_arr+n);
component c = cur_data.near_fd_comp;

field_type ft = is_magnetic(c) ? B_stuff : D_stuff;
if (0 > cur_data.fc_idx or cur_data.fc_idx >= num_chunks) meep::abort("fields chunk index out of range");
fields_chunk *fc = chunks[cur_data.fc_idx];
if (!fc->is_mine()) meep::abort("wrong fields chunk");

fc->add_source(ft, src_vol(c, src, std::move(index_arr), std::move(amplitudes)));
fc->add_source(ft, src_vol(c, src, std::move(index_arr), std::move(amplitudes), needs_boundary_fix));
// We can't do require_component(c) since that only works if all processes are adding
// srcdata for the same components in the same order, which may not be true.
// ... instead, the caller should call fields::require_source_components()
Expand Down Expand Up @@ -343,6 +349,23 @@ complex<double> amp_file_func(const vec &p) {
return res;
}

void fields::register_src_time(src_time *src) {
sources = src->add_to(sources, &src);
if (src->id == 0) { // doesn't have an ID yet
size_t max_id = 0;
for (src_time *s = sources; s; s = s->next)
max_id = s->id > max_id ? s->id : max_id;
src->id = max_id + 1;
}
}

src_time *fields::lookup_src_time(size_t id) {
if (id == 0) abort("bug: cannot lookup unregistered source");
for (src_time *s = sources; s; s = s->next)
if (s->id == id) return s;
return NULL;
}

void fields::add_volume_source(component c, const src_time &src, const volume &where_,
complex<double> *arr, size_t dim1, size_t dim2, size_t dim3,
complex<double> amp) {
Expand Down