From c6aca70bc71b4021203748fbd639e39638ca936c Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 19 Jun 2020 16:50:18 -0400 Subject: [PATCH 1/7] cache epsilon computations for MPB to improve MPI scaling --- src/mpb.cpp | 90 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/src/mpb.cpp b/src/mpb.cpp index e4fa3875d..9d442af41 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -41,35 +41,57 @@ typedef struct { double frequency; ndim dim; const fields *f; + + /* for parallel efficiency, we first cache the epsinv data from each process, then + sum_to_all to synchonize, and then re-run set_maxwell_dielectric with the cached data */ + double *cache; // array of 6*ncache eps_inv matrix entries in the order that they are needed + size_t ncache; // allocated size of the cache + size_t icache; // current position in the cache + bool use_cache; // whether we are using the cache } meep_mpb_eps_data; static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const mpb_real r[3], void *eps_data_) { meep_mpb_eps_data *eps_data = (meep_mpb_eps_data *)eps_data_; - const double *s = eps_data->s; - const double *o = eps_data->o; - double frequency = eps_data->frequency; - vec p(eps_data->dim == D3 ? vec(o[0] + r[0] * s[0], o[1] + r[1] * s[1], o[2] + r[2] * s[2]) - : (eps_data->dim == D2 ? vec(o[0] + r[0] * s[0], o[1] + r[1] * s[1]) : - /* D1 */ vec(o[2] + r[2] * s[2]))); - const fields *f = eps_data->f; - - eps_inv->m00 = real(f->get_chi1inv(Ex, X, p, frequency)); - eps_inv->m11 = real(f->get_chi1inv(Ey, Y, p, frequency)); - eps_inv->m22 = real(f->get_chi1inv(Ez, Z, p, frequency)); - - ASSIGN_ESCALAR(eps_inv->m01, real(f->get_chi1inv(Ex, Y, p, frequency)), 0); - ASSIGN_ESCALAR(eps_inv->m02, real(f->get_chi1inv(Ex, Z, p, frequency)), 0); - ASSIGN_ESCALAR(eps_inv->m12, real(f->get_chi1inv(Ey, Z, p, frequency)), 0); - /* - master_printf("m11(%g,%g) = %g\n", p.x(), p.y(), eps_inv->m00); - master_printf("m22(%g,%g) = %g\n", p.x(), p.y(), eps_inv->m11); - master_printf("m33(%g,%g) = %g\n", p.x(), p.y(), eps_inv->m22); - master_printf("m12(%g,%g) = %g\n", p.x(), p.y(), eps_inv->m01); - master_printf("m13(%g,%g) = %g\n", p.x(), p.y(), eps_inv->m02); - master_printf("m23(%g,%g) = %g\n", p.x(), p.y(), eps_inv->m12); - */ - maxwell_sym_matrix_invert(eps, eps_inv); + size_t i = eps_data->icache; + + if (eps_data->use_cache) { + const double *cache = eps_data->cache; + eps_inv->m00 = cache[6*i]; + eps_inv->m11 = cache[6*i+1]; + eps_inv->m22 = cache[6*i+2]; + ASSIGN_ESCALAR(eps_inv->m01, cache[6*i+3], 0); + ASSIGN_ESCALAR(eps_inv->m02, cache[6*i+4], 0); + ASSIGN_ESCALAR(eps_inv->m12, cache[6*i+5], 0); + maxwell_sym_matrix_invert(eps, eps_inv); + } + else { + // get cache pointer, doubling cache size as needed: + double *cache = eps_data->icache < eps_data->ncache ? eps_data->cache : + (eps_data->cache = (double*) realloc(eps_data->cache, eps_data->ncache *= 2)); + + const double *s = eps_data->s; + const double *o = eps_data->o; + double frequency = eps_data->frequency; + vec p(eps_data->dim == D3 ? vec(o[0] + r[0] * s[0], o[1] + r[1] * s[1], o[2] + r[2] * s[2]) + : (eps_data->dim == D2 ? vec(o[0] + r[0] * s[0], o[1] + r[1] * s[1]) : + /* D1 */ vec(o[2] + r[2] * s[2]))); + const fields *f = eps_data->f; + + // call get_chi1inv with parallel=false to get only local epsilon data + cache[6*i] = real(f->get_chi1inv(Ex, X, p, frequency, false)); + cache[6*i+2] = real(f->get_chi1inv(Ey, Y, p, frequency, false)); + cache[6*i+3] = real(f->get_chi1inv(Ez, Z, p, frequency, false)); + cache[6*i+4] = real(f->get_chi1inv(Ex, Y, p, frequency, false)); + cache[6*i+5] = real(f->get_chi1inv(Ex, Z, p, frequency, false)); + cache[6*i+6] = real(f->get_chi1inv(Ey, Z, p, frequency, false)); + + // return a dummy value epsilon = 1 while we are building up the cache + eps->m00 = eps->m11 = eps->m22 = eps_inv->m00 = eps_inv->m11 = eps_inv->m22 = 1; + ASSIGN_ESCALAR(eps->m01, 0, 0); + eps->m02 = eps->m12 = eps_inv->m01 = eps_inv->m02 = eps_inv->m12 = eps->m01; + } + eps_data->icache += 1; } /**************************************************************/ @@ -382,7 +404,27 @@ void *fields::get_eigenmode(double frequency, direction d, const volume where, c eps_data.dim = gv.dim; eps_data.f = this; eps_data.frequency = frequency; + eps_data.cache = (double *) malloc(sizeof(double) * 6 * (eps_data.ncache = 512)); + + // first, build up a cache of the local epsilon data (while returning dummy values to MPB) + eps_data.use_cache = false; + eps_data.icache = 0; set_maxwell_dielectric(mdata, mesh_size, R, G, meep_mpb_eps, NULL, &eps_data); + + // then, synchronize the data + eps_data.ncache = eps_data.icache; // actual amount of cached data + double *summed_cache = (double *) malloc(sizeof(double) * 6 * eps_data.ncache); + sum_to_all(eps_data.cache, summed_cache, eps_data.ncache); + free(eps_data.cache); + eps_data.cache = summed_cache; + + // finally, send MPB the real epsilon data using the synchronized cache + eps_data.use_cache = true; + eps_data.icache = 0; + set_maxwell_dielectric(mdata, mesh_size, R, G, meep_mpb_eps, NULL, &eps_data); + + free(eps_data.cache); + if (user_mdata) *user_mdata = (void *)mdata; } else { From 158882abb2998c49a8042e17063cc6b525da4a71 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 19 Jun 2020 16:53:39 -0400 Subject: [PATCH 2/7] whoops --- src/mpb.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mpb.cpp b/src/mpb.cpp index 9d442af41..9eed5e509 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -68,7 +68,7 @@ static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const else { // get cache pointer, doubling cache size as needed: double *cache = eps_data->icache < eps_data->ncache ? eps_data->cache : - (eps_data->cache = (double*) realloc(eps_data->cache, eps_data->ncache *= 2)); + (eps_data->cache = (double*) realloc(eps_data->cache, 6 * (eps_data->ncache *= 2))); const double *s = eps_data->s; const double *o = eps_data->o; From 6d4d7720feaa9a4449b1fc7be16b423f7de8860f Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 19 Jun 2020 16:55:02 -0400 Subject: [PATCH 3/7] tweak --- src/mpb.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mpb.cpp b/src/mpb.cpp index 9eed5e509..7c38486de 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -67,7 +67,7 @@ static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const } else { // get cache pointer, doubling cache size as needed: - double *cache = eps_data->icache < eps_data->ncache ? eps_data->cache : + double *cache = i < eps_data->ncache ? eps_data->cache : (eps_data->cache = (double*) realloc(eps_data->cache, 6 * (eps_data->ncache *= 2))); const double *s = eps_data->s; @@ -91,7 +91,8 @@ static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const ASSIGN_ESCALAR(eps->m01, 0, 0); eps->m02 = eps->m12 = eps_inv->m01 = eps_inv->m02 = eps_inv->m12 = eps->m01; } - eps_data->icache += 1; + + eps_data->icache += 1; // next call will use the subsequent cache element } /**************************************************************/ From ddb2337f5d86a22fa826cfca5cadd20f9a58f6c6 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 19 Jun 2020 22:19:42 -0400 Subject: [PATCH 4/7] fixes --- src/mpb.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mpb.cpp b/src/mpb.cpp index 7c38486de..d3deab283 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -80,11 +80,11 @@ static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const // call get_chi1inv with parallel=false to get only local epsilon data cache[6*i] = real(f->get_chi1inv(Ex, X, p, frequency, false)); - cache[6*i+2] = real(f->get_chi1inv(Ey, Y, p, frequency, false)); - cache[6*i+3] = real(f->get_chi1inv(Ez, Z, p, frequency, false)); - cache[6*i+4] = real(f->get_chi1inv(Ex, Y, p, frequency, false)); - cache[6*i+5] = real(f->get_chi1inv(Ex, Z, p, frequency, false)); - cache[6*i+6] = real(f->get_chi1inv(Ey, Z, p, frequency, false)); + cache[6*i+1] = real(f->get_chi1inv(Ey, Y, p, frequency, false)); + cache[6*i+2] = real(f->get_chi1inv(Ez, Z, p, frequency, false)); + cache[6*i+3] = real(f->get_chi1inv(Ex, Y, p, frequency, false)); + cache[6*i+4] = real(f->get_chi1inv(Ex, Z, p, frequency, false)); + cache[6*i+5] = real(f->get_chi1inv(Ey, Z, p, frequency, false)); // return a dummy value epsilon = 1 while we are building up the cache eps->m00 = eps->m11 = eps->m22 = eps_inv->m00 = eps_inv->m11 = eps_inv->m22 = 1; @@ -415,7 +415,7 @@ void *fields::get_eigenmode(double frequency, direction d, const volume where, c // then, synchronize the data eps_data.ncache = eps_data.icache; // actual amount of cached data double *summed_cache = (double *) malloc(sizeof(double) * 6 * eps_data.ncache); - sum_to_all(eps_data.cache, summed_cache, eps_data.ncache); + sum_to_all(eps_data.cache, summed_cache, eps_data.ncache*6); free(eps_data.cache); eps_data.cache = summed_cache; From 85981c2ef072a9a38d9bdf1a786364ef1a34859b Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 23 Jun 2020 21:18:25 -0400 Subject: [PATCH 5/7] add missing sizeof --- src/mpb.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mpb.cpp b/src/mpb.cpp index d3deab283..79f468033 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -68,7 +68,7 @@ static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const else { // get cache pointer, doubling cache size as needed: double *cache = i < eps_data->ncache ? eps_data->cache : - (eps_data->cache = (double*) realloc(eps_data->cache, 6 * (eps_data->ncache *= 2))); + (eps_data->cache = (double*) realloc(eps_data->cache, sizeof(double) * 6 * (eps_data->ncache *= 2))); const double *s = eps_data->s; const double *o = eps_data->o; From 98ea4e4647bc18bb9bad9c42cc70b5c7b0869eb4 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 23 Jun 2020 21:34:59 -0400 Subject: [PATCH 6/7] tell MPB not to do its own subpixel averaging --- src/mpb.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/mpb.cpp b/src/mpb.cpp index 79f468033..207ef3687 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -50,11 +50,15 @@ typedef struct { bool use_cache; // whether we are using the cache } meep_mpb_eps_data; -static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const mpb_real r[3], +static int meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, + mpb_real n[3], mpb_real d1, mpb_real d2, mpb_real d3, mpb_real tol, + const mpb_real r[3], void *eps_data_) { meep_mpb_eps_data *eps_data = (meep_mpb_eps_data *)eps_data_; size_t i = eps_data->icache; + (void) n; (void) d1; (void) d2; (void) d3; (void) tol; // unused + if (eps_data->use_cache) { const double *cache = eps_data->cache; eps_inv->m00 = cache[6*i]; @@ -93,6 +97,8 @@ static void meep_mpb_eps(symmetric_matrix *eps, symmetric_matrix *eps_inv, const } eps_data->icache += 1; // next call will use the subsequent cache element + + return 1; // tells MPB not to do its own subpixel averaging } /**************************************************************/ @@ -410,7 +416,7 @@ void *fields::get_eigenmode(double frequency, direction d, const volume where, c // first, build up a cache of the local epsilon data (while returning dummy values to MPB) eps_data.use_cache = false; eps_data.icache = 0; - set_maxwell_dielectric(mdata, mesh_size, R, G, meep_mpb_eps, NULL, &eps_data); + set_maxwell_dielectric(mdata, mesh_size, R, G, NULL, meep_mpb_eps, &eps_data); // then, synchronize the data eps_data.ncache = eps_data.icache; // actual amount of cached data @@ -422,7 +428,8 @@ void *fields::get_eigenmode(double frequency, direction d, const volume where, c // finally, send MPB the real epsilon data using the synchronized cache eps_data.use_cache = true; eps_data.icache = 0; - set_maxwell_dielectric(mdata, mesh_size, R, G, meep_mpb_eps, NULL, &eps_data); + set_maxwell_dielectric(mdata, mesh_size, R, G, NULL, meep_mpb_eps, &eps_data); + assert(eps_data.icache == eps_data.ncache); free(eps_data.cache); From 32893d309bff92ae1e63a36a970c50a90572a7c3 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 24 Jun 2020 22:13:13 -0400 Subject: [PATCH 7/7] assert.h --- src/mpb.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mpb.cpp b/src/mpb.cpp index 207ef3687..8da619eaf 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include "meep.hpp" #include "config.h"