Skip to content

Commit

Permalink
fixes and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
oskooi committed Sep 15, 2021
1 parent bbdcba2 commit 57b4359
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 69 deletions.
14 changes: 8 additions & 6 deletions doc/docs/Python_User_Interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def __init__(self,
progress_interval=4,
subpixel_tol=0.0001,
subpixel_maxeval=100000,
loop_tile_base=0,
loop_tile_base_db=0,
loop_tile_base_eh=0,
ensure_periodicity=True,
num_chunks=0,
Courant=0.5,
Expand Down Expand Up @@ -286,11 +287,12 @@ Python. `Vector3` is a `meep` class.
the minimum refractive index (usually 1), and in practice $S$ should be slightly
smaller.

+ **`loop_tile_base` [`number`]** — To improve the memory locality of the step-curl
field updates, Meep has an experimental feature to "tile" the loop over the Yee grid
voxels. The splitting of this loop into tiles or subdomains involves a recursive-bisection
method in which the base case for the number of voxels is specified using this parameter.
The default value is 0 or no tiling; a typical nonzero value to try would be `10000`.
+ **`loop_tile_base_db`, `loop_tile_base_eh` [`number`]** — To improve the [memory locality](https://en.wikipedia.org/wiki/Locality_of_reference)
of the field updates, Meep has an experimental feature to "tile" the loops over the Yee grid
voxels. The splitting of the update loops for step-curl and update-eh into tiles or subdomains
involves a recursive-bisection method in which the base case for the number of voxels is
specified using these two parameters, respectively. The default value is 0 or no tiling;
a typical nonzero value to try would be 10000.

+ **`output_volume` [`Volume` class ]** — Specifies the default region of space
that is output by the HDF5 output functions (below); see also the `Volume` class
Expand Down
11 changes: 6 additions & 5 deletions python/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,11 +1130,12 @@ def __init__(self,
the minimum refractive index (usually 1), and in practice $S$ should be slightly
smaller.
+ **`loop_tile_base` [`number`]** — To improve the memory locality of the step-curl
field updates, Meep has an experimental feature to "tile" the loop over the Yee grid
voxels. The splitting of this loop into tiles or subdomains involves a recursive-bisection
method in which the base case for the number of voxels is specified using this parameter.
The default value is 0 or no tiling; a typical nonzero value to try would be 10000.
+ **`loop_tile_base_db`, `loop_tile_base_eh` [`number`]** — To improve the [memory locality](https://en.wikipedia.org/wiki/Locality_of_reference)
of the field updates, Meep has an experimental feature to "tile" the loops over the Yee grid
voxels. The splitting of the update loops for step-curl and update-eh into tiles or subdomains
involves a recursive-bisection method in which the base case for the number of voxels is
specified using these two parameters, respectively. The default value is 0 or no tiling;
a typical nonzero value to try would be 10000.
+ **`output_volume` [`Volume` class ]** — Specifies the default region of space
that is output by the HDF5 output functions (below); see also the `Volume` class
Expand Down
48 changes: 44 additions & 4 deletions src/fields.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ fields::fields(structure *s, double m, double beta, bool zero_fields_near_cylori
chunks = new fields_chunk_ptr[num_chunks];
for (int i = 0; i < num_chunks; i++)
chunks[i] = new fields_chunk(s->chunks[i], outdir, m, beta,
zero_fields_near_cylorigin, i);
zero_fields_near_cylorigin, i, loop_tile_base_db);
FOR_FIELD_TYPES(ft) {
typedef realnum *realnum_ptr;
comm_blocks[ft] = new realnum_ptr[num_chunks * num_chunks];
Expand Down Expand Up @@ -199,8 +199,40 @@ fields_chunk::~fields_chunk() {
if (new_s && new_s->refcount-- <= 1) delete new_s; // delete if not shared
}

void split_into_tiles(grid_volume gvol, std::vector<grid_volume> *result,
const size_t loop_tile_base) {
if (gvol.nowned_min() < loop_tile_base) {
result->push_back(gvol);
return;
}

int best_split_point;
direction best_split_direction;
gvol.tile_split(best_split_point, best_split_direction);
grid_volume left_gvol = gvol.split_at_fraction(false, best_split_point, best_split_direction);
split_into_tiles(left_gvol, result, loop_tile_base);
grid_volume right_gvol = gvol.split_at_fraction(true, best_split_point, best_split_direction);
split_into_tiles(right_gvol, result, loop_tile_base);
return;
}

// First check that the tile volumes gvs do not intersect and that they add
// up to the chunk's total grid_volume gv
void check_tiles(grid_volume gv, const std::vector<grid_volume> &gvs) {
grid_volume vol_intersection;
for (size_t i = 0; i < gvs.size(); i++)
for (size_t j = i + 1; j < gvs.size(); j++)
if (gvs[i].intersect_with(gvs[j], &vol_intersection))
meep::abort("gvs[%zu] intersects with gvs[%zu]\n", i, j);
size_t sum = 0;
for (const auto& sub_gv : gvs) { sum += sub_gv.nowned_min(); }
size_t v_grid_points = 1;
LOOP_OVER_DIRECTIONS(gv.dim, d) { v_grid_points *= gv.num_direction(d); }
if (sum != v_grid_points) meep::abort("v_grid_points = %zu, sum(tiles) = %zu\n", v_grid_points, sum);
}

fields_chunk::fields_chunk(structure_chunk *the_s, const char *od, double m, double beta,
bool zero_fields_near_cylorigin, int chunkidx)
bool zero_fields_near_cylorigin, int chunkidx, int loop_tile_base_db)
: gv(the_s->gv), v(the_s->v), m(m), zero_fields_near_cylorigin(zero_fields_near_cylorigin),
beta(beta) {
s = the_s;
Expand All @@ -213,6 +245,12 @@ fields_chunk::fields_chunk(structure_chunk *the_s, const char *od, double m, dou
Courant = s->Courant;
dt = s->dt;
dft_chunks = NULL;
if (loop_tile_base_db > 0) {
split_into_tiles(gv, &gvs_tiled, loop_tile_base_db);
check_tiles(gv, gvs_tiled);
} else {
gvs_tiled.push_back(gv);
}
FOR_FIELD_TYPES(ft) {
polarization_state *cur = NULL;
pol[ft] = NULL;
Expand Down Expand Up @@ -268,8 +306,10 @@ fields_chunk::fields_chunk(const fields_chunk &thef, int chunkidx) : gv(thef.gv)
Courant = thef.Courant;
dt = thef.dt;
dft_chunks = NULL;
gvs_db = thef.gvs_db;
gvs_eh = thef.gvs_eh;
gvs_tiled = thef.gvs_tiled;
FOR_FIELD_TYPES(ft) {
gvs_eh[ft] = thef.gvs_eh[ft];
}
FOR_FIELD_TYPES(ft) {
polarization_state *cur = NULL;
for (polarization_state *ocur = thef.pol[ft]; ocur; ocur = ocur->next) {
Expand Down
4 changes: 2 additions & 2 deletions src/meep.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1467,7 +1467,7 @@ class fields_chunk {

double a, Courant, dt; // resolution a, Courant number, and timestep dt=Courant/a
grid_volume gv;
std::vector<grid_volume> gvs_db, gvs_eh; // subdomains for tiled execution of step_db and update_eh
std::vector<grid_volume> gvs_tiled, gvs_eh[NUM_FIELD_TYPES]; // subdomains for tiled execution
volume v;
double m; // angular dependence in cyl. coords
bool zero_fields_near_cylorigin; // fields=0 m pixels near r=0 for stability
Expand All @@ -1480,7 +1480,7 @@ class fields_chunk {
int chunk_idx;

fields_chunk(structure_chunk *, const char *outdir, double m, double beta,
bool zero_fields_near_cylorigin, int chunkidx);
bool zero_fields_near_cylorigin, int chunkidx, int loop_tile_base_db);

fields_chunk(const fields_chunk &, int chunkidx);
~fields_chunk();
Expand Down
46 changes: 1 addition & 45 deletions src/step_db.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,53 +30,9 @@ using namespace std;

namespace meep {

void split_into_tiles(grid_volume gvol, std::vector<grid_volume> *result,
const size_t loop_tile_base) {
if (gvol.nowned_min() < loop_tile_base) {
result->push_back(gvol);
return;
}

int best_split_point;
direction best_split_direction;
gvol.tile_split(best_split_point, best_split_direction);
grid_volume left_gvol = gvol.split_at_fraction(false, best_split_point, best_split_direction);
split_into_tiles(left_gvol, result, loop_tile_base);
grid_volume right_gvol = gvol.split_at_fraction(true, best_split_point, best_split_direction);
split_into_tiles(right_gvol, result, loop_tile_base);
return;
}

// First check that the tile volumes gvs do not intersect and that they add
// up to the chunk's total grid_volume gv
void check_tiles(grid_volume gv, const std::vector<grid_volume> &gvs) {
grid_volume vol_intersection;
for (size_t i = 0; i < gvs.size(); i++)
for (size_t j = i + 1; j < gvs.size(); j++)
if (gvs[i].intersect_with(gvs[j], &vol_intersection))
meep::abort("gvs[%zu] intersects with gvs[%zu]\n", i, j);
size_t sum = 0;
for (const auto& sub_gv : gvs) { sum += sub_gv.nowned_min(); }
size_t v_grid_points = 1;
LOOP_OVER_DIRECTIONS(gv.dim, d) { v_grid_points *= gv.num_direction(d); }
if (sum != v_grid_points) meep::abort("v_grid_points = %zu, sum(tiles) = %zu\n", v_grid_points, sum);
}

void fields::step_db(field_type ft) {
if (ft != B_stuff && ft != D_stuff) meep::abort("step_db only works with B/D");

// split the chunk volume into subdomains for tiled execution of step_db loop
for (int i = 0; i < num_chunks; i++)
if (chunks[i]->is_mine() && changed_materials) {
if (!chunks[i]->gvs_db.empty()) chunks[i]->gvs_db.clear();
if (loop_tile_base_db > 0) {
split_into_tiles(chunks[i]->gv, &chunks[i]->gvs_db, loop_tile_base_db);
check_tiles(chunks[i]->gv, chunks[i]->gvs_db);
} else {
chunks[i]->gvs_db.push_back(chunks[i]->gv);
}
}

for (int i = 0; i < num_chunks; i++)
if (chunks[i]->is_mine())
if (chunks[i]->step_db(ft)) {
Expand All @@ -88,7 +44,7 @@ void fields::step_db(field_type ft) {
bool fields_chunk::step_db(field_type ft) {
bool allocated_u = false;

for (const auto& sub_gv : gvs_db) {
for (const auto& sub_gv : gvs_tiled) {
DOCMP FOR_FT_COMPONENTS(ft, cc) {
if (f[cc][cmp]) {
const component c_p = plus_component[cc], c_m = minus_component[cc];
Expand Down
15 changes: 8 additions & 7 deletions src/update_eh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ namespace meep {

void fields::update_eh(field_type ft, bool skip_w_components) {
if (ft != E_stuff && ft != H_stuff) meep::abort("update_eh only works with E/H");
// split the chunk volume into subdomains for tiled execution of update_eh loop

// split the chunks' volume into subdomains for tiled execution of update_eh loop
for (int i = 0; i < num_chunks; i++)
if (chunks[i]->is_mine() && changed_materials) {
bool is_aniso = false;
Expand All @@ -40,12 +41,12 @@ void fields::update_eh(field_type ft, bool skip_w_components) {
break;
}
}
if (!chunks[i]->gvs_eh.empty()) chunks[i]->gvs_eh.clear();
if (!chunks[i]->gvs_eh[ft].empty()) chunks[i]->gvs_eh[ft].clear();
if (loop_tile_base_eh > 0 && is_aniso) {
split_into_tiles(chunks[i]->gv, &chunks[i]->gvs_eh, loop_tile_base_eh);
check_tiles(chunks[i]->gv, chunks[i]->gvs_eh);
split_into_tiles(chunks[i]->gv, &chunks[i]->gvs_eh[ft], loop_tile_base_eh);
check_tiles(chunks[i]->gv, chunks[i]->gvs_eh[ft]);
} else {
chunks[i]->gvs_eh.push_back(chunks[i]->gv);
chunks[i]->gvs_eh[ft].push_back(chunks[i]->gv);
}
}

Expand Down Expand Up @@ -145,7 +146,7 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) {
dmp[dc][cmp] = f_minus_p[dc][cmp] ? f_minus_p[dc][cmp] : f[dc][cmp];
}

for (size_t i = 0; i < gvs_eh.size(); ++i) {
for (size_t i = 0; i < gvs_eh[ft].size(); ++i) {
DOCMP FOR_FT_COMPONENTS(ft, ec) {
if (f[ec][cmp]) {
if (type(ec) != ft) meep::abort("bug in FOR_FT_COMPONENTS");
Expand Down Expand Up @@ -188,7 +189,7 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) {
}

if (f[ec][cmp] != f[dc][cmp])
STEP_UPDATE_EDHB(f[ec][cmp], ec, gv, gvs_eh[i].little_owned_corner(ec), gvs_eh[i].big_corner(),
STEP_UPDATE_EDHB(f[ec][cmp], ec, gv, gvs_eh[ft][i].little_owned_corner(ec), gvs_eh[ft][i].big_corner(),
dmp[dc][cmp], dmp[dc_1][cmp], dmp[dc_2][cmp],
s->chi1inv[ec][d_ec], dmp[dc_1][cmp] ? s->chi1inv[ec][d_1] : NULL,
dmp[dc_2][cmp] ? s->chi1inv[ec][d_2] : NULL, s_ec, s_1, s_2, s->chi2[ec],
Expand Down

0 comments on commit 57b4359

Please sign in to comment.