diff --git a/src/boundaries.cpp b/src/boundaries.cpp index e0dc87287..838b06375 100644 --- a/src/boundaries.cpp +++ b/src/boundaries.cpp @@ -144,22 +144,9 @@ vec fields::lattice_vector(direction d) const { return gv[ilattice_vector(d)]; } void fields::disconnect_chunks() { chunk_connections_valid = false; for (int i = 0; i < num_chunks; i++) { - DOCMP { - FOR_FIELD_TYPES(f) { - for (int ip = 0; ip < 3; ++ip) - for (int io = 0; io < 2; io++) { - delete[] chunks[i]->connections[f][ip][io]; - chunks[i]->connections[f][ip][io] = NULL; - } - } - } - FOR_FIELD_TYPES(f) { - delete[] chunks[i]->connection_phases[f]; - chunks[i]->connection_phases[f] = NULL; - for (int ip = 0; ip < 3; ++ip) - for (int io = 0; io < 2; io++) - chunks[i]->num_connections[f][ip][io] = 0; - } + chunks[i]->connections_in.clear(); + chunks[i]->connections_out.clear(); + chunks[i]->connection_phases.clear(); } FOR_FIELD_TYPES(ft) { for (int i = 0; i < num_chunks * num_chunks; i++) { @@ -358,16 +345,6 @@ bool fields_chunk::needs_W_notowned(component c) { } void fields::connect_the_chunks() { - size_t *nc[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][2]; - FOR_FIELD_TYPES(f) { - for (connect_phase ip : all_connect_phases) - for (int io = 0; io < 2; io++) { - nc[f][ip][io] = new size_t[num_chunks]; - for (int i = 0; i < num_chunks; i++) - nc[f][ip][io][i] = 0; - } - } - /* For some of the chunks, H==B, and we definitely don't need to send B between two such chunks. We'll still send B when the recipient has H != B, since the recipient needs to get B @@ -377,13 +354,13 @@ void fields::connect_the_chunks() { a bit subtle since the non-owned B may be different from H even on an H==B chunk (true?), but since we don't use the non-owned B for anything(?) it shouldn't matter. */ - int *B_redundant = new int[num_chunks * 2 * 5]; + std::vector B_redundant(num_chunks * 2 * 5); for (int i = 0; i < num_chunks; ++i) FOR_H_AND_B(hc, bc) { B_redundant[5 * (num_chunks + i) + bc - Bx] = chunks[i]->f[hc][0] == chunks[i]->f[bc][0]; } am_now_working_on(MpiAllTime); - and_to_all(B_redundant + 5 * num_chunks, B_redundant, 5 * num_chunks); + and_to_all(B_redundant.data() + 5 * num_chunks, B_redundant.data(), 5 * num_chunks); finished_working(); /* Figure out whether we need the notowned W field (== E/H in @@ -406,6 +383,7 @@ void fields::connect_the_chunks() { finished_working(); comm_sizes.clear(); + const size_t num_reals_per_voxel = is_real ? 1 : 2; for (int i = 0; i < num_chunks; i++) { // First count the border elements... const grid_volume vi = chunks[i]->gv; @@ -424,19 +402,11 @@ void fields::connect_the_chunks() { const connect_phase ip = thephase == 1.0 ? CONNECT_COPY : (thephase == -1.0 ? CONNECT_NEGATE : CONNECT_PHASE); - { - field_type f = type(c); - const int nn = is_real ? 1 : 2; - nc[f][ip][Incoming][i] += nn; - nc[f][ip][Outgoing][j] += nn; - comm_sizes[{f, ip, pair_j_to_i}] += nn; - } + comm_sizes[{type(c), ip, pair_j_to_i}] += num_reals_per_voxel; + if (needs_W_notowned[corig]) { field_type f = is_electric(corig) ? WE_stuff : WH_stuff; - const int nn = is_real ? 1 : 2; - nc[f][ip][Incoming][i] += nn; - nc[f][ip][Outgoing][j] += nn; - comm_sizes[{f, ip, pair_j_to_i}] += nn; + comm_sizes[{f, ip, pair_j_to_i}] += num_reals_per_voxel; } if (is_electric(corig) || is_magnetic(corig)) { field_type f = is_electric(corig) ? PE_stuff : PH_stuff; @@ -453,14 +423,8 @@ void fields::connect_the_chunks() { cni += pj->s->num_cinternal_notowned_needed(c, pj->data); } } - const size_t nn = (is_real ? 1 : 2) * (cni); - nc[f][ip][Incoming][i] += nn; - nc[f][ip][Outgoing][j] += nn; - comm_sizes[{f, ip, pair_j_to_i}] += nn; - const connect_phase iip = CONNECT_COPY; - nc[f][iip][Incoming][i] += ni; - nc[f][iip][Outgoing][j] += ni; - comm_sizes[{f, iip, pair_j_to_i}] += ni; + comm_sizes[{f, ip, pair_j_to_i}] += cni * num_reals_per_voxel; + comm_sizes[{f, CONNECT_COPY, pair_j_to_i}] += ni; } } // if is_mine and owns... } // loop over j chunks @@ -476,87 +440,90 @@ void fields::connect_the_chunks() { } } // loop over i chunks - /* Note that the ordering of the connections arrays must be kept - consistent with the fields::step_boundaries. In particular, we - must set up the connections array so that all of the connections - for process i come before all of the connections for process i' - for i < i' */ - - // wh stores the current indices in the connections array(s) - size_t *wh[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][2]; - - /* Now allocate the connection arrays... this is still slightly - wasteful (by a factor of 2) because we allocate for chunks we - don't own if we have a connection with them. Removing this waste - would mean a bunch more is_mine() checks below. */ - FOR_FIELD_TYPES(f) { - for (connect_phase ip : all_connect_phases) { - for (in_or_out io : all_in_or_out) { - for (int i = 0; i < num_chunks; i++) - chunks[i]->alloc_extra_connections(field_type(f), connect_phase(ip), in_or_out(io), - nc[f][ip][io][i]); - delete[] nc[f][ip][io]; - wh[f][ip][io] = new size_t[num_chunks]; - } - for (int i = 0; i < num_chunks; i++) - wh[f][ip][Outgoing][i] = 0; + // Preallocate all connection vectors. + for (const std::pair &key_and_comm_size : comm_sizes) { + const chunk_pair &pair_j_to_i = key_and_comm_size.first.pair; + if (chunks[pair_j_to_i.first]->is_mine()) { + chunks[pair_j_to_i.first]->connections_out[key_and_comm_size.first].reserve( + key_and_comm_size.second); + } + if (chunks[pair_j_to_i.second]->is_mine()) { + chunks[pair_j_to_i.second]->connections_in[key_and_comm_size.first].reserve( + key_and_comm_size.second); } } // Next start setting up the connections... - for (int i = 0; i < num_chunks; i++) { - const grid_volume vi = chunks[i]->gv; - - // initialize wh[f][ip][Incoming][j] to sum of comm_sizes for jj < j - FOR_FIELD_TYPES(f) { - for (connect_phase ip : all_connect_phases) { - wh[f][ip][Incoming][0] = 0; - for (int j = 1; j < num_chunks; ++j) - wh[f][ip][Incoming][j] = wh[f][ip][Incoming][j - 1] + get_comm_size({f, ip, {j - 1, i}}); - } - } + const grid_volume &vi = chunks[i]->gv; FOR_COMPONENTS(corig) { if (have_component(corig)) LOOP_OVER_VOL_NOTOWNED(vi, corig, n) { IVEC_LOOP_ILOC(vi, here); component c = corig; // We're looking at a border element... - complex thephase; - if (locate_component_point(&c, &here, &thephase) && !on_metal_boundary(here)) + std::complex thephase_double; + if (locate_component_point(&c, &here, &thephase_double) && !on_metal_boundary(here)) { + std::complex thephase(thephase_double.real(), thephase_double.imag()); for (int j = 0; j < num_chunks; j++) { - if ((chunks[i]->is_mine() || chunks[j]->is_mine()) && chunks[j]->gv.owns(here) && + const std::pair pair_j_to_i{j, i}; + const bool i_is_mine = chunks[i]->is_mine(); + const bool j_is_mine = chunks[j]->is_mine(); + if (!i_is_mine && !j_is_mine) { continue; } + + auto push_back_phase = [this, &thephase, &pair_j_to_i](field_type f) { + chunks[pair_j_to_i.second] + ->connection_phases[{f, CONNECT_PHASE, pair_j_to_i}] + .push_back(std::complex(thephase.real(), thephase.imag())); + }; + auto push_back_incoming_pointer = [this, &pair_j_to_i](field_type f, connect_phase ip, + realnum *p) { + chunks[pair_j_to_i.second]->connections_in[{f, ip, pair_j_to_i}].push_back(p); + }; + auto push_back_outgoing_pointer = [this, &pair_j_to_i](field_type f, connect_phase ip, + realnum *p) { + chunks[pair_j_to_i.first]->connections_out[{f, ip, pair_j_to_i}].push_back(p); + }; + + if (chunks[j]->gv.owns(here) && !(is_B(corig) && is_B(c) && B_redundant[5 * i + corig - Bx] && B_redundant[5 * j + c - Bx])) { - const connect_phase ip = thephase == 1.0 - ? CONNECT_COPY - : (thephase == -1.0 ? CONNECT_NEGATE : CONNECT_PHASE); + const connect_phase ip = + thephase == static_cast(1.0) + ? CONNECT_COPY + : (thephase == static_cast(-1.0) ? CONNECT_NEGATE : CONNECT_PHASE); const ptrdiff_t m = chunks[j]->gv.index(c, here); { field_type f = type(c); - if (ip == CONNECT_PHASE) - chunks[i]->connection_phases[f][wh[f][ip][Incoming][j] / 2] = thephase; - DOCMP { - chunks[i]->connections[f][ip][Incoming][wh[f][ip][Incoming][j]++] = - chunks[i]->f[corig][cmp] + n; - chunks[j]->connections[f][ip][Outgoing][wh[f][ip][Outgoing][j]++] = - chunks[j]->f[c][cmp] + m; + if (i_is_mine) { + if (ip == CONNECT_PHASE) { push_back_phase(f); } + DOCMP { push_back_incoming_pointer(f, ip, chunks[i]->f[corig][cmp] + n); } + } + if (j_is_mine) { + DOCMP { push_back_outgoing_pointer(f, ip, chunks[j]->f[c][cmp] + m); } } } if (needs_W_notowned[corig]) { field_type f = is_electric(corig) ? WE_stuff : WH_stuff; - if (ip == CONNECT_PHASE) - chunks[i]->connection_phases[f][wh[f][ip][Incoming][j] / 2] = thephase; - DOCMP { - chunks[i]->connections[f][ip][Incoming][wh[f][ip][Incoming][j]++] = - (chunks[i]->f_w[corig][cmp] ? chunks[i]->f_w[corig][cmp] - : chunks[i]->f[corig][cmp]) + - n; - chunks[j]->connections[f][ip][Outgoing][wh[f][ip][Outgoing][j]++] = - (chunks[j]->f_w[c][cmp] ? chunks[j]->f_w[c][cmp] : chunks[j]->f[c][cmp]) + - m; + if (i_is_mine) { + if (ip == CONNECT_PHASE) { push_back_phase(f); } + DOCMP { + push_back_incoming_pointer(f, ip, + (chunks[i]->f_w[corig][cmp] + ? chunks[i]->f_w[corig][cmp] + : chunks[i]->f[corig][cmp]) + + n); + } + } + if (j_is_mine) { + DOCMP { + push_back_outgoing_pointer( + f, ip, + (chunks[j]->f_w[c][cmp] ? chunks[j]->f_w[c][cmp] : chunks[j]->f[c][cmp]) + + m); + } } } @@ -574,21 +541,31 @@ void fields::connect_the_chunks() { const connect_phase iip = CONNECT_COPY; const size_t ni = po->s->num_internal_notowned_needed(corig, po->data); for (size_t k = 0; k < ni; ++k) { - chunks[i]->connections[f][iip][Incoming][wh[f][iip][Incoming][j]++] = - po->s->internal_notowned_ptr(k, corig, n, pi->data); - chunks[j]->connections[f][iip][Outgoing][wh[f][iip][Outgoing][j]++] = - po->s->internal_notowned_ptr(k, c, m, pj->data); + if (i_is_mine) { + push_back_incoming_pointer( + f, iip, po->s->internal_notowned_ptr(k, corig, n, pi->data)); + } + if (j_is_mine) { + push_back_outgoing_pointer( + f, iip, po->s->internal_notowned_ptr(k, c, m, pj->data)); + } } const size_t cni = po->s->num_cinternal_notowned_needed(corig, po->data); for (size_t k = 0; k < cni; ++k) { - if (ip == CONNECT_PHASE) - chunks[i]->connection_phases[f][wh[f][ip][Incoming][j] / 2] = - thephase; - DOCMP { - chunks[i]->connections[f][ip][Incoming][wh[f][ip][Incoming][j]++] = - po->s->cinternal_notowned_ptr(k, corig, cmp, n, pi->data); - chunks[j]->connections[f][ip][Outgoing][wh[f][ip][Outgoing][j]++] = - po->s->cinternal_notowned_ptr(k, c, cmp, m, pj->data); + if (i_is_mine) { + if (ip == CONNECT_PHASE) { push_back_phase(f); } + + DOCMP { + push_back_incoming_pointer( + f, ip, + po->s->cinternal_notowned_ptr(k, corig, cmp, n, pi->data)); + } + } + if (j_is_mine) { + DOCMP { + push_back_outgoing_pointer( + f, ip, po->s->cinternal_notowned_ptr(k, c, cmp, m, pj->data)); + } } } } @@ -596,14 +573,12 @@ void fields::connect_the_chunks() { } // is_electric(corig) } // if is_mine and owns... } // loop over j chunks + } // here in user_volume } // LOOP_OVER_VOL_NOTOWNED } // FOR_COMPONENTS } // loop over i chunks FOR_FIELD_TYPES(f) { - for (connect_phase ip : all_connect_phases) - for (int io = 0; io < 2; io++) - delete[] wh[f][ip][io]; // Calculate the sequence of sends and receives in advance. // Initiate receive operations as early as possible. @@ -645,23 +620,6 @@ void fields::connect_the_chunks() { comms_sequence_for_field[f] = optimize_comms_operations(operations); } - delete[] B_redundant; -} - -void fields_chunk::alloc_extra_connections(field_type f, connect_phase ip, in_or_out io, - size_t num) { - if (num == 0) return; // No need to go to any bother... - const size_t tot = num_connections[f][ip][io] + num; - if (io == Incoming && ip == CONNECT_PHASE) { - delete[] connection_phases[f]; - connection_phases[f] = new complex[tot]; - } - typedef realnum *realnum_ptr; - realnum **conn = new realnum_ptr[tot]; - if (!conn) meep::abort("Out of memory!\n"); - delete[] connections[f][ip][io]; - connections[f][ip][io] = conn; - num_connections[f][ip][io] = tot; } } // namespace meep diff --git a/src/fields.cpp b/src/fields.cpp index 9440cebf8..84e5d898b 100644 --- a/src/fields.cpp +++ b/src/fields.cpp @@ -176,12 +176,6 @@ fields_chunk::~fields_chunk() { delete[] f_cond_backup[c][cmp]; } delete[] f_rderiv_int; - FOR_FIELD_TYPES(ft) { - for (int ip = 0; ip < 3; ip++) - for (int io = 0; io < 2; io++) - delete[] connections[ft][ip][io]; - } - FOR_FIELD_TYPES(ft) { delete[] connection_phases[ft]; } while (dft_chunks) { dft_chunk *nxt = dft_chunks->next_in_chunk; delete dft_chunks; @@ -250,12 +244,6 @@ fields_chunk::fields_chunk(structure_chunk *the_s, const char *od, double m, dou } f_rderiv_int = NULL; FOR_FIELD_TYPES(ft) { - for (int ip = 0; ip < 3; ip++) - num_connections[ft][ip][Incoming] = num_connections[ft][ip][Outgoing] = 0; - connection_phases[ft] = 0; - for (int ip = 0; ip < 3; ip++) - for (int io = 0; io < 2; io++) - connections[ft][ip][io] = NULL; zeroes[ft] = NULL; num_zeroes[ft] = 0; } @@ -334,12 +322,6 @@ fields_chunk::fields_chunk(const fields_chunk &thef, int chunkidx) : gv(thef.gv) } } FOR_FIELD_TYPES(ft) { - for (int ip = 0; ip < 3; ip++) - num_connections[ft][ip][Incoming] = num_connections[ft][ip][Outgoing] = 0; - connection_phases[ft] = 0; - for (int ip = 0; ip < 3; ip++) - for (int io = 0; io < 2; io++) - connections[ft][ip][io] = NULL; zeroes[ft] = NULL; num_zeroes[ft] = 0; } diff --git a/src/meep.hpp b/src/meep.hpp index 1479ecfc4..66d365608 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -1438,9 +1438,10 @@ class fields_chunk { realnum **zeroes[NUM_FIELD_TYPES]; // Holds pointers to metal points. size_t num_zeroes[NUM_FIELD_TYPES]; - realnum **connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; - size_t num_connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; - std::complex *connection_phases[NUM_FIELD_TYPES]; + std::unordered_map, comms_key_hash_fn> connections_in; + std::unordered_map, comms_key_hash_fn> connections_out; + std::unordered_map >, comms_key_hash_fn> + connection_phases; int npol[NUM_FIELD_TYPES]; // only E_stuff and H_stuff are used polarization_state *pol[NUM_FIELD_TYPES]; // array of npol[i] polarization_state structures @@ -1542,8 +1543,6 @@ class fields_chunk { void initialize_field(component, std::complex f(const vec &)); void initialize_with_nth_te(int n, double kz); void initialize_with_nth_tm(int n, double kz); - // boundaries.cpp - void alloc_extra_connections(field_type, connect_phase, in_or_out, size_t); // dft.cpp void update_dfts(double timeE, double timeH, int current_step); diff --git a/src/step.cpp b/src/step.cpp index 615a55d3e..8bdf8a537 100644 --- a/src/step.cpp +++ b/src/step.cpp @@ -177,9 +177,7 @@ void fields::step_boundaries(field_type ft) { const auto &sequence = comms_sequence_for_field[ft]; for (const comms_operation &op : sequence.receive_ops) { - if (chunks[op.other_chunk_idx]->is_mine()) { - continue; - } + if (chunks[op.other_chunk_idx]->is_mine()) { continue; } manager->receive_real_async(comm_blocks[ft][op.pair_idx], static_cast(op.transfer_size), op.other_proc_id, op.tag); } @@ -188,38 +186,26 @@ void fields::step_boundaries(field_type ft) { for (int i = 0; i < num_chunks; i++) if (chunks[i]->is_mine()) chunks[i]->zero_metal(ft); - /* Note that the copying of data to/from buffers is order-sensitive, - and must be kept consistent with the code in boundaries.cpp. - In particular, we require that boundaries.cpp set up the connections - array so that all of the connections for process i come before all - of the connections for process i' for i < i' */ - // Copy outgoing data into buffers while following the predefined sequence of comms operations. // Trigger the asynchronous send immediately once the outgoing comms buffer has been filled. - using offset_array = std::array; - std::map connection_offset_by_chunk; am_now_working_on(Boundaries); for (const comms_operation &op : sequence.send_ops) { - if (!connection_offset_by_chunk.count(op.my_chunk_idx)) { - connection_offset_by_chunk.emplace(std::make_pair(op.my_chunk_idx, offset_array{})); - } const std::pair comm_pair{op.my_chunk_idx, op.other_chunk_idx}; const int pair_idx = op.pair_idx; - size_t n0 = 0; + realnum *outgoing_comm_block = comm_blocks[ft][pair_idx]; for (connect_phase ip : all_connect_phases) { - const size_t pair_comm_size = get_comm_size({ft, ip, comm_pair}); - ptrdiff_t &connection_offset = connection_offset_by_chunk[op.my_chunk_idx][ip]; + const comms_key key = {ft, ip, comm_pair}; + const size_t pair_comm_size = get_comm_size(key); + const std::vector &outgoing_connection = + chunks[op.my_chunk_idx]->connections_out[key]; for (size_t n = 0; n < pair_comm_size; ++n) { - comm_blocks[ft][pair_idx][n0 + n] = - *(chunks[op.my_chunk_idx]->connections[ft][ip][Outgoing][connection_offset++]); + outgoing_comm_block[n] = *(outgoing_connection[n]); } - n0 += pair_comm_size; - } - if (chunks[op.other_chunk_idx]->is_mine()) { - continue; + outgoing_comm_block += pair_comm_size; } + if (chunks[op.other_chunk_idx]->is_mine()) { continue; } manager->send_real_async(comm_blocks[ft][pair_idx], static_cast(op.transfer_size), op.other_proc_id, op.tag); } @@ -235,51 +221,45 @@ void fields::step_boundaries(field_type ft) { for (int i = 0; i < num_chunks; i++) { if (!chunks[i]->is_mine()) continue; - ptrdiff_t connection_phase_offset = 0; - ptrdiff_t negate_phase_offset = 0; - ptrdiff_t copy_phase_offset = 0; - const std::complex *connection_phase_for_ft = chunks[i]->connection_phases[ft]; - for (int j = 0; j < num_chunks; j++) { - const chunk_pair pair{j, i}; - const int pair_idx = chunk_pair_to_index(pair); + const chunk_pair comm_pair{j, i}; + const int pair_idx = chunk_pair_to_index(comm_pair); const realnum *pair_comm_block = static_cast(comm_blocks[ft][pair_idx]); { const std::complex *pair_comm_block_complex = reinterpret_cast *>(pair_comm_block); - const connect_phase ip = CONNECT_PHASE; - realnum **dst = chunks[i]->connections[ft][ip][Incoming]; - size_t num_transfers = get_comm_size({ft, ip, pair}) / 2; // Two realnums per complex + const comms_key key = {ft, CONNECT_PHASE, comm_pair}; + const std::vector &incoming_connection = chunks[i]->connections_in[key]; + const std::vector > &connection_phase_for_ft = + chunks[i]->connection_phases[key]; + size_t num_transfers = get_comm_size(key) / 2; // Two realnums per complex for (size_t n = 0; n < num_transfers; ++n) { - std::complex temp = connection_phase_for_ft[connection_phase_offset + n] * pair_comm_block_complex[n]; - *(dst[2*(connection_phase_offset + n)]) = temp.real(); - *(dst[2*(connection_phase_offset + n)+1]) = temp.imag(); + std::complex temp = connection_phase_for_ft[n] * pair_comm_block_complex[n]; + *(incoming_connection[2 * n]) = temp.real(); + *(incoming_connection[2 * n + 1]) = temp.imag(); } - connection_phase_offset += num_transfers; pair_comm_block += 2 * num_transfers; } { - const connect_phase ip = CONNECT_NEGATE; - const size_t num_transfers = get_comm_size({ft, ip, pair}); - realnum **dst = chunks[i]->connections[ft][ip][Incoming]; + const comms_key key = {ft, CONNECT_NEGATE, comm_pair}; + const std::vector &incoming_connection = chunks[i]->connections_in[key]; + const size_t num_transfers = get_comm_size(key); for (size_t n = 0; n < num_transfers; ++n) { - *(dst[negate_phase_offset + n]) = -pair_comm_block[n]; + *(incoming_connection[n]) = -pair_comm_block[n]; } - negate_phase_offset += num_transfers; pair_comm_block += num_transfers; } { - connect_phase ip = CONNECT_COPY; - const size_t num_transfers = get_comm_size({ft, ip, pair}); - realnum **dst = chunks[i]->connections[ft][ip][Incoming]; + const comms_key key = {ft, CONNECT_COPY, comm_pair}; + const std::vector &incoming_connection = chunks[i]->connections_in[key]; + const size_t num_transfers = get_comm_size(key); for (size_t n = 0; n < num_transfers; ++n) { - *(dst[copy_phase_offset + n]) = pair_comm_block[n]; + *(incoming_connection[n]) = pair_comm_block[n]; } - copy_phase_offset += num_transfers; } } } @@ -291,6 +271,7 @@ void fields::step_source(field_type ft, bool including_integrated) { for (int i = 0; i < num_chunks; i++) if (chunks[i]->is_mine()) chunks[i]->step_source(ft, including_integrated); } + void fields_chunk::step_source(field_type ft, bool including_integrated) { if (doing_solve_cw && !including_integrated) return; for (const src_vol &sv : sources[ft]) {