Skip to content

Commit

Permalink
loop tiling for STEP_UPDATE_EDHB
Browse files Browse the repository at this point in the history
  • Loading branch information
oskooi committed Aug 16, 2021
1 parent a523a87 commit dc926c2
Showing 1 changed file with 45 additions and 43 deletions.
88 changes: 45 additions & 43 deletions src/update_eh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,53 +123,55 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) {
dmp[dc][cmp] = f_minus_p[dc][cmp] ? f_minus_p[dc][cmp] : f[dc][cmp];
}

DOCMP FOR_FT_COMPONENTS(ft, ec) {
if (f[ec][cmp]) {
if (type(ec) != ft) meep::abort("bug in FOR_FT_COMPONENTS");
component dc = field_type_component(ft2, ec);
const direction d_ec = component_direction(ec);
const ptrdiff_t s_ec = gv.stride(d_ec) * (ft == H_stuff ? -1 : +1);
const direction d_1 = cycle_direction(gv.dim, d_ec, 1);
const component dc_1 = direction_component(dc, d_1);
const ptrdiff_t s_1 = gv.stride(d_1) * (ft == H_stuff ? -1 : +1);
const direction d_2 = cycle_direction(gv.dim, d_ec, 2);
const component dc_2 = direction_component(dc, d_2);
const ptrdiff_t s_2 = gv.stride(d_2) * (ft == H_stuff ? -1 : +1);

direction dsigw0 = d_ec;
direction dsigw = s->sigsize[dsigw0] > 1 ? dsigw0 : NO_DIRECTION;

// lazily allocate any E/H fields that are needed (H==B initially)
if (f[ec][cmp] == f[dc][cmp] &&
(s->chi1inv[ec][d_ec] || have_f_minus_p || dsigw != NO_DIRECTION)) {
f[ec][cmp] = new realnum[gv.ntot()];
memcpy(f[ec][cmp], f[dc][cmp], gv.ntot() * sizeof(realnum));
allocated_eh = true;
}
for (const auto& sub_gv : gvs) {
DOCMP FOR_FT_COMPONENTS(ft, ec) {
if (f[ec][cmp]) {
if (type(ec) != ft) meep::abort("bug in FOR_FT_COMPONENTS");
component dc = field_type_component(ft2, ec);
const direction d_ec = component_direction(ec);
const ptrdiff_t s_ec = gv.stride(d_ec) * (ft == H_stuff ? -1 : +1);
const direction d_1 = cycle_direction(gv.dim, d_ec, 1);
const component dc_1 = direction_component(dc, d_1);
const ptrdiff_t s_1 = gv.stride(d_1) * (ft == H_stuff ? -1 : +1);
const direction d_2 = cycle_direction(gv.dim, d_ec, 2);
const component dc_2 = direction_component(dc, d_2);
const ptrdiff_t s_2 = gv.stride(d_2) * (ft == H_stuff ? -1 : +1);

direction dsigw0 = d_ec;
direction dsigw = s->sigsize[dsigw0] > 1 ? dsigw0 : NO_DIRECTION;

// lazily allocate any E/H fields that are needed (H==B initially)
if (f[ec][cmp] == f[dc][cmp] &&
(s->chi1inv[ec][d_ec] || have_f_minus_p || dsigw != NO_DIRECTION)) {
f[ec][cmp] = new realnum[gv.ntot()];
memcpy(f[ec][cmp], f[dc][cmp], gv.ntot() * sizeof(realnum));
allocated_eh = true;
}

// lazily allocate W auxiliary field
if (!f_w[ec][cmp] && dsigw != NO_DIRECTION) {
f_w[ec][cmp] = new realnum[gv.ntot()];
memcpy(f_w[ec][cmp], f[ec][cmp], gv.ntot() * sizeof(realnum));
if (needs_W_notowned(ec)) allocated_eh = true; // communication needed
}
// lazily allocate W auxiliary field
if (!f_w[ec][cmp] && dsigw != NO_DIRECTION) {
f_w[ec][cmp] = new realnum[gv.ntot()];
memcpy(f_w[ec][cmp], f[ec][cmp], gv.ntot() * sizeof(realnum));
if (needs_W_notowned(ec)) allocated_eh = true; // communication needed
}

// for solve_cw, when W exists we get W and E from special variables
if (f_w[ec][cmp] && skip_w_components) continue;
// for solve_cw, when W exists we get W and E from special variables
if (f_w[ec][cmp] && skip_w_components) continue;

// save W field from this timestep in f_w_prev if needed by pols
if (needs_W_prev(ec)) {
if (!f_w_prev[ec][cmp]) f_w_prev[ec][cmp] = new realnum[gv.ntot()];
memcpy(f_w_prev[ec][cmp], f_w[ec][cmp] ? f_w[ec][cmp] : f[ec][cmp],
sizeof(realnum) * gv.ntot());
}
// save W field from this timestep in f_w_prev if needed by pols
if (needs_W_prev(ec)) {
if (!f_w_prev[ec][cmp]) f_w_prev[ec][cmp] = new realnum[gv.ntot()];
memcpy(f_w_prev[ec][cmp], f_w[ec][cmp] ? f_w[ec][cmp] : f[ec][cmp],
sizeof(realnum) * gv.ntot());
}

if (f[ec][cmp] != f[dc][cmp])
STEP_UPDATE_EDHB(f[ec][cmp], ec, gv, gv.little_owned_corner(ec), gv.big_corner(),
dmp[dc][cmp], dmp[dc_1][cmp], dmp[dc_2][cmp],
s->chi1inv[ec][d_ec], dmp[dc_1][cmp] ? s->chi1inv[ec][d_1] : NULL,
dmp[dc_2][cmp] ? s->chi1inv[ec][d_2] : NULL, s_ec, s_1, s_2, s->chi2[ec],
s->chi3[ec], f_w[ec][cmp], dsigw, s->sig[dsigw], s->kap[dsigw]);
if (f[ec][cmp] != f[dc][cmp])
STEP_UPDATE_EDHB(f[ec][cmp], ec, gv, sub_gv.little_owned_corner(ec), sub_gv.big_corner(),
dmp[dc][cmp], dmp[dc_1][cmp], dmp[dc_2][cmp],
s->chi1inv[ec][d_ec], dmp[dc_1][cmp] ? s->chi1inv[ec][d_1] : NULL,
dmp[dc_2][cmp] ? s->chi1inv[ec][d_2] : NULL, s_ec, s_1, s_2, s->chi2[ec],
s->chi3[ec], f_w[ec][cmp], dsigw, s->sig[dsigw], s->kap[dsigw]);
}
}
}

Expand Down

0 comments on commit dc926c2

Please sign in to comment.