Subpixel smoothing gradients (#1780)

* reset to include off-diag components * get subpixel gradients working and add ability to chance step size * forgot to update the wrapper api too * change material grid smoothing default * fix test_material_grid flags to be consistent with new default * reduce FD stepsize of DFT tests for single precision * reduce A_u FD step size * minor fixes to comments and var names Co-authored-by: Alec Hammond <ahammond36@gatech.edu>
NanoComp · Nov 17, 2021 · 03ea7ad · 03ea7ad
1 parent ce4d151
commit 03ea7ad
Show file tree

Hide file tree

Showing 9 changed files with 97 additions and 50 deletions.
diff --git a/python/adjoint/optimization_problem.py b/python/adjoint/optimization_problem.py
@@ -31,7 +31,8 @@ def __init__(
         decay_by=1e-11,
         decimation_factor=0,
         minimum_run_time=0,
-        maximum_run_time=None
+        maximum_run_time=None,
+        finite_difference_step=utils.FD_DEFAULT
     ):
 
         self.sim = simulation
@@ -87,6 +88,7 @@ def __init__(
         self.decimation_factor = decimation_factor
         self.minimum_run_time = minimum_run_time
         self.maximum_run_time = maximum_run_time
+        self.finite_difference_step = finite_difference_step # step size used in Aᵤ computation
 
         # store sources for finite difference estimations
         self.forward_sources = self.sim.sources
@@ -99,11 +101,11 @@ def __init__(
 
         self.gradient = []
 
-    def __call__(self, rho_vector=None, need_value=True, need_gradient=True):
+    def __call__(self, rho_vector=None, need_value=True, need_gradient=True, beta=None):
         """Evaluate value and/or gradient of objective function.
         """
         if rho_vector:
-            self.update_design(rho_vector=rho_vector)
+            self.update_design(rho_vector=rho_vector, beta=beta)
 
         # Run forward run if requested
         if need_value and self.current_state == "INIT":
@@ -265,6 +267,7 @@ def calculate_gradient(self):
                 self.D_a[ar][dri],
                 self.D_f[dri],
                 self.frequencies,
+                self.finite_difference_step
             ) for dri, dr in enumerate(self.design_regions)
         ] for ar in range(len(self.objective_functions))]
 

diff --git a/python/adjoint/utils.py b/python/adjoint/utils.py
@@ -18,6 +18,9 @@
 _PHI_AXIS = 3
 _Z_AXIS = 1
 
+# default finite difference step size when calculating Aᵤ
+FD_DEFAULT = 1e-3
+
 class DesignRegion(object):
     def __init__(
             self,
@@ -38,7 +41,7 @@ def update_design_parameters(self, design_parameters):
     def update_beta(self,beta):
         self.design_parameters.beta=beta
 
-    def get_gradient(self, sim, fields_a, fields_f, frequencies):
+    def get_gradient(self, sim, fields_a, fields_f, frequencies, finite_difference_step):
         num_freqs = onp.array(frequencies).size
         shapes = []
         '''We have the option to linear scale the gradients up front
@@ -82,7 +85,8 @@ def get_gradient(self, sim, fields_a, fields_f, frequencies):
         vol = sim._fit_volume_to_simulation(self.volume)
 
         # compute the gradient
-        mp._get_gradient(grad,scalegrad,fields_a,fields_f,sim.gv,vol.swigobj,onp.array(frequencies),sim.geps,shapes)
+        mp._get_gradient(grad,scalegrad,fields_a,fields_f,
+        sim.gv,vol.swigobj,onp.array(frequencies),sim.geps,shapes,finite_difference_step)
         return onp.squeeze(grad).T
 
 def _check_if_cylindrical(sim):
@@ -104,6 +108,7 @@ def calculate_vjps(
     adj_fields: List[List[onp.ndarray]],
     design_variable_shapes: List[Tuple[int, ...]],
     sum_freq_partials: bool = True,
+    finite_difference_step: float = FD_DEFAULT
 ) -> List[onp.ndarray]:
     """Calculates the VJP for a given set of forward and adjoint fields."""
     vjps = [
@@ -112,6 +117,7 @@ def calculate_vjps(
             adj_fields[i],
             fwd_fields[i],
             frequencies,
+            finite_difference_step,
         ) for i, design_region in enumerate(design_regions)
     ]
     if sum_freq_partials:

diff --git a/python/adjoint/wrapper.py b/python/adjoint/wrapper.py
@@ -103,6 +103,7 @@ def __init__(
         minimum_run_time: float = 0,
         maximum_run_time: float = onp.inf,
         until_after_sources: bool = True,
+        finite_difference_step: float = utils.FD_DEFAULT
     ):
         self.simulation = simulation
         self.sources = sources
@@ -113,6 +114,7 @@ def __init__(
         self.minimum_run_time = minimum_run_time
         self.maximum_run_time = maximum_run_time
         self.until_after_sources = until_after_sources
+        self.finite_difference_step = finite_difference_step
 
         self._simulate_fn = self._initialize_callable()
 
@@ -206,6 +208,7 @@ def _calculate_vjps(
             adj_fields,
             design_variable_shapes,
             sum_freq_partials=sum_freq_partials,
+            finite_difference_step=self.finite_difference_step
         )
 
     def _initialize_callable(

diff --git a/python/geom.py b/python/geom.py
@@ -529,7 +529,7 @@ class MaterialGrid(object):
     the `default_material` argument of the [`Simulation`](#Simulation) constructor (similar to a
     [material function](#medium)).
     """
-    def __init__(self,grid_size,medium1,medium2,weights=None,grid_type="U_DEFAULT",do_averaging=False,beta=0,eta=0.5,damping=0):
+    def __init__(self,grid_size,medium1,medium2,weights=None,grid_type="U_DEFAULT",do_averaging=True,beta=0,eta=0.5,damping=0):
         """
         Creates a `MaterialGrid` object.
 

diff --git a/python/meep.i b/python/meep.i
@@ -846,7 +846,7 @@ meep::volume_list *make_volume_list(const meep::volume &v, int c,
 %inline %{
 void _get_gradient(PyObject *grad, double scalegrad, PyObject *fields_a, PyObject *fields_f, 
     meep::grid_volume *grid_volume, meep::volume *where, PyObject *frequencies, 
-    meep_geom::geom_epsilon *geps, PyObject *fields_shapes) {
+    meep_geom::geom_epsilon *geps, PyObject *fields_shapes, double fd_step) {
     // clean the gradient array
     PyArrayObject *pao_grad = (PyArrayObject *)grad;
     if (!PyArray_Check(pao_grad)) meep::abort("grad parameter must be numpy array.");
@@ -884,7 +884,7 @@ void _get_gradient(PyObject *grad, double scalegrad, PyObject *fields_a, PyObjec
     if (PyArray_DIMS(pao_grad)[0] != nf) meep::abort("Numpy grad array is allocated for %td frequencies; it should be allocated for %td.",PyArray_DIMS(pao_grad)[0],nf);
 
     // calculate the gradient
-    meep_geom::material_grids_addgradient(grad_c,ng,fields_a_c,fields_f_c,fields_shapes_c,frequencies_c,scalegrad,*grid_volume,*where,geps);
+    meep_geom::material_grids_addgradient(grad_c,ng,fields_a_c,fields_f_c,fields_shapes_c,frequencies_c,scalegrad,*grid_volume,*where,geps,fd_step);
 }
 %}
 

diff --git a/python/tests/test_adjoint_solver.py b/python/tests/test_adjoint_solver.py
@@ -292,13 +292,17 @@ def test_adjoint_solver_DFT_fields(self):
             print("|Ez|^2 -- adjoint solver: {}, traditional simulation: {}".format(adjsol_obj,Ez2_unperturbed))
             self.assertClose(adjsol_obj,Ez2_unperturbed,epsilon=1e-6)
 
+            ## the DFT fields finite differences are more sensitive in single precision
+            deps_dft = 1e-4 if mp.is_single_precision() else 1e-5
+            dp_dft = deps_dft*rng.rand(Nx*Ny)
+
             ## compute perturbed Ez2
-            Ez2_perturbed = forward_simulation(p+dp, MonitorObject.DFT, frequencies)
+            Ez2_perturbed = forward_simulation(p+dp_dft, MonitorObject.DFT, frequencies)
 
             ## compare gradients
             if adjsol_grad.ndim < 2:
                 adjsol_grad = np.expand_dims(adjsol_grad,axis=1)
-            adj_scale = (dp[None,:]@adjsol_grad).flatten()
+            adj_scale = (dp_dft[None,:]@adjsol_grad).flatten()
             fd_grad = Ez2_perturbed-Ez2_unperturbed
             print("Directional derivative -- adjoint solver: {}, FD: {}".format(adj_scale,fd_grad))
             tol = 0.0461 if mp.is_single_precision() else 0.005

diff --git a/python/tests/test_material_grid.py b/python/tests/test_material_grid.py
@@ -27,6 +27,7 @@ def compute_transmittance(matgrid_symmetry=False):
                                   mp.air,
                                   mp.Medium(index=3.5),
                                   weights=weights,
+                                  do_averaging=False,
                                   grid_type='U_MEAN')
 
         geometry = [mp.Block(center=mp.Vector3(),

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
@@ -2620,8 +2620,10 @@ std::complex<double> get_material_gradient(
     std::complex<double> fields_f,    // forward field at current point
     double freq,                      // frequency
     geom_epsilon *geps,               // material
-    meep::grid_volume &gv,             // simulation grid volume
-    double du                         // step size
+    meep::grid_volume &gv,            // simulation grid volume
+    double du,                        // step size
+    double *u,                        // matgrid
+    int idx                           // matgrid index
 ) {
   /*Compute the Aᵤx product from the -λᵀAᵤx calculation.
   The current adjoint (λ) field component (adjoint_c)
@@ -2664,45 +2666,60 @@ std::complex<double> get_material_gradient(
       v.set_direction_max(d, r.in_direction(d) + 0.5*gv.inva*sd);
     }
     double row_1[3], row_2[3], dA_du[3];
-    geps->u_p = -du;
+    double orig = u[idx];
+    u[idx] -= du;
     geps->eff_chi1inv_row(adjoint_c, row_1, v, geps->tol, geps->maxeval);
-    geps->u_p = du;
+    u[idx] += 2*du;
     geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, geps->maxeval);
-    geps->u_p=0;
+    u[idx] = orig;
 
     for (int i=0;i<3;i++) dA_du[i] = (row_1[i] - row_2[i])/(2*du);
     return dA_du[dir_idx] * fields_f;
 
   }else{
+    double orig = u[idx];
     std::complex<double> row_1[3], row_2[3], dA_du[3];
-    geps->u_p = -du;
+    u[idx] -= du;
     eff_chi1inv_row_disp(adjoint_c,row_1,r,freq,geps);
-    geps->u_p = du;
+    u[idx] += 2*du;
     eff_chi1inv_row_disp(adjoint_c,row_2,r,freq,geps);
-    geps->u_p=0;
+    u[idx] = orig;
 
     for (int i=0;i<3;i++) dA_du[i] = (row_1[i] - row_2[i])/(2*du);
     return dA_du[dir_idx] * fields_f;
   }  
 }
 
-/* add the weights from linear_interpolate (see the linear_interpolate
-   function in fields.cpp) to data ... this has to be changed if
-   linear_interpolate is changed!! ...also multiply by scaleby
-   etc. for different gradient types */
-
+/* A brute force approach to calculating Aᵤ using finite differences.
+Past versions of the code only calculated dA/dε using a finite
+difference, and then multiplied by the analytic vJp (dε/du).
+With the addition of subpixel smoothing, however, the vJp became
+much more complicated and it is easier to calculate the entire gradient
+using finite differences (at the cost of slightly less accurate gradients 
+due to roundoff).
+ */
 void add_interpolate_weights(double rx, double ry, double rz,
                              double *data, int nx, int ny, int nz, int stride,
-                             double scaleby, const double *udata, int ukind, double uval) {
+                             double scaleby, double *udata, int ukind, double uval,
+                             meep::vec r, geom_epsilon *geps, 
+                             meep::component adjoint_c, meep::component forward_c,
+                                      std::complex<double> fwd, std::complex<double> adj,
+                                      double freq, meep::grid_volume &gv, double du
+                             ) {
   int x1, y1, z1, x2, y2, z2;
   double dx, dy, dz, u;
 
   meep::map_coordinates(rx, ry, rz, nx, ny, nz,
                         x1, y1, z1, x2, y2, z2,
                         dx, dy, dz);
+  int x_list[2] = {x1,x2}, y_list[2] = {y1,y2}, z_list[2] = {z1,z2};
+  int lx = (x1 == x2) ? 1 : 2;
+  int ly = (y1 == y2) ? 1 : 2;
+  int lz = (z1 == z2) ? 1 : 2;
 
 /* define a macro to give us data(x,y,z) on the grid,
 in row-major order (the order used by HDF5): */
+#define IDX(x,y,z) (((x)*ny + (y)) * nz + (z)) * stride
 #define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride])
 #define U(x, y, z) (udata[(((x)*ny + (y)) * nz + (z)) * stride])
 
@@ -2716,22 +2733,28 @@ in row-major order (the order used by HDF5): */
   if (ukind == material_data::U_MIN && u != uval) return; // TODO look into this
   if (ukind == material_data::U_PROD) scaleby *= uval / u;
 
-  D(x1, y1, z1) += (1.0 - dx) * (1.0 - dy) * (1.0 - dz) * scaleby;
-  D(x2, y1, z1) += dx * (1.0 - dy) * (1.0 - dz) * scaleby;
-  D(x1, y2, z1) += (1.0 - dx) * dy * (1.0 - dz) * scaleby;
-  D(x2, y2, z1) += dx * dy * (1.0 - dz) * scaleby;
-  D(x1, y1, z2) += (1.0 - dx) * (1.0 - dy) * dz * scaleby;
-  D(x2, y1, z2) += dx * (1.0 - dy) * dz * scaleby;
-  D(x1, y2, z2) += (1.0 - dx) * dy * dz * scaleby;
-  D(x2, y2, z2) += dx * dy * dz * scaleby;
+  for (int xi=0;xi<lx;xi++){
+    for (int yi=0;yi<ly;yi++){
+      for (int zi=0;zi<lz;zi++){
+        int x=x_list[xi], y=y_list[yi], z=z_list[zi];
+        int u_idx = IDX(x,y,z);
+        std::complex<double> prod = adj*get_material_gradient(
+          r,adjoint_c,forward_c,fwd,freq,geps,gv,du,udata,u_idx);
+        D(x, y, z) += prod.real() * scaleby;
+      }
+    }
+  }
 
+#undef IDX
 #undef D
 #undef U
 }
 
-void material_grids_addgradient_point(double *v,
-                                      vector3 p, double scalegrad,
-                                      geom_epsilon *geps) {
+void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, geom_epsilon *geps,
+                                      meep::component adjoint_c, meep::component forward_c,
+                                      std::complex<double> fwd, std::complex<double> adj,
+                                      double freq, meep::grid_volume &gv, double tol                                      
+                                      ) {
   geom_box_tree tp;
   int oi, ois;
   material_data *mg, *mg_sum;
@@ -2786,8 +2809,10 @@ void material_grids_addgradient_point(double *v,
       vector3 pb = to_geom_box_coords(p, &tp->objects[oi]);
       add_interpolate_weights(
           pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1,
-          scalegrad,
-          ucur, kind, uval);
+          scalegrad,ucur, kind, uval,
+          vector3_to_vec(p),geps,adjoint_c,forward_c,
+          fwd,adj,freq,gv,tol
+          );
       if (kind == material_data::U_DEFAULT) break;
       tp = geom_tree_search_next(p, tp, &oi);
     } while (tp && is_material_grid((material_data *)tp->objects[oi].o->material));
@@ -2800,8 +2825,9 @@ void material_grids_addgradient_point(double *v,
     double *ucur = mg->weights;
     uval = tanh_projection(material_grid_val(p, mg), mg->beta, mg->eta);
     add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1,
-                            scalegrad,
-                            ucur, kind, uval);
+                            scalegrad, ucur, kind, uval,
+                            vector3_to_vec(p),geps,adjoint_c,forward_c,
+                            fwd,adj,freq,gv,tol);
   }
 }
 
@@ -2841,7 +2867,7 @@ ptrdiff_t get_idx_from_ivec(meep::ndim dim, meep::ivec c1, ptrdiff_t *the_stride
 void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fields_a,
                                 std::complex<double> *fields_f, size_t fields_shapes[12], double *frequencies,
                                 double scalegrad, meep::grid_volume &gv,
-                                meep::volume &where, geom_epsilon *geps) {
+                                meep::volume &where, geom_epsilon *geps, double du) {
 
 // only loop over field components relevant to our simulation (in the proper order)
 #define FOR_MY_COMPONENTS(c1) FOR_ELECTRIC_COMPONENTS(c1) if (!coordinate_mismatch(gv.dim, component_direction(c1)))
@@ -2910,15 +2936,17 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fiel
           /* trivial case, no interpolation/restriction needed        */
           if (forward_c == adjoint_c){
             std::complex<double> fwd = GET_FIELDS(fields_f,ci_forward,f_i,idx_fields);
-            std::complex<double> prod = adj * get_material_gradient(p, adjoint_c, forward_c, fwd, frequencies[f_i], geps, gv, 1e-6);
             cyl_scale = (gv.dim == meep::Dcyl) ? 2*p.r() : 1; // the pi is already factored in near2far.cpp
-            material_grids_addgradient_point(v+ng*f_i, vec_to_vector3(p), scalegrad*prod.real()*cyl_scale, geps);
+            material_grids_addgradient_point(
+              v+ng*f_i, vec_to_vector3(p), scalegrad*cyl_scale, geps,
+              adjoint_c, forward_c, fwd, adj, frequencies[f_i], gv, du);
           /* more complicated case requires interpolation/restriction */
           }else{
             /* we need to restrict the adjoint fields to
-            the two nodes of interest, and interpolate
-            the forward fields to the same two nodes. Then
-            we perform our inner product at these nodes
+            the two nodes of interest (which requires a factor
+            of 0.5 to scale), and interpolate the forward fields 
+            to the same two nodes (which requires another factor of 0.5). 
+            Then we perform our inner product at these nodes.
             */            
             std::complex<double> fwd_avg, fwd1, fwd2, prod;
             ptrdiff_t fwd1_idx, fwd2_idx;
@@ -2945,8 +2973,9 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fiel
             fwd_avg = fwd1 + fwd2;
             meep::vec eps1 = gv[ieps1];
             cyl_scale = (gv.dim == meep::Dcyl) ? eps1.r() : 1;
-            prod = 0.5*adj*get_material_gradient(eps1, adjoint_c, forward_c, fwd_avg, frequencies[f_i], geps, gv, 1e-6);
-            material_grids_addgradient_point(v+ng*f_i, vec_to_vector3(eps1), scalegrad*prod.real()*cyl_scale, geps);
+            material_grids_addgradient_point(
+              v+ng*f_i, vec_to_vector3(eps1), scalegrad*cyl_scale, geps,
+              adjoint_c, forward_c, fwd_avg, 0.5*adj, frequencies[f_i], gv, du);
 
             //operate on the second eps node
             fwd1_idx = get_idx_from_ivec(gv.dim, start_ivec, the_stride, fwd_pa);
@@ -2956,8 +2985,9 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fiel
             fwd_avg = fwd1 + fwd2;
             meep::vec eps2 = gv[ieps2];
             cyl_scale = (gv.dim == meep::Dcyl) ? eps2.r() : 1;
-            prod = 0.5*adj*get_material_gradient(eps2, adjoint_c, forward_c, fwd_avg, frequencies[f_i], geps, gv, 1e-6);
-            material_grids_addgradient_point(v+ng*f_i, vec_to_vector3(eps2), scalegrad*prod.real()*cyl_scale, geps);
+            material_grids_addgradient_point(
+              v+ng*f_i, vec_to_vector3(eps2), scalegrad*cyl_scale, geps,
+              adjoint_c, forward_c, fwd_avg, 0.5*adj, frequencies[f_i], gv, du);
           }
           ci_forward++;
         }

diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
@@ -298,7 +298,7 @@ geom_box_tree calculate_tree(const meep::volume &v, geometric_object_list g);
 void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fields_a,
                                 std::complex<double> *fields_f, size_t fields_shapes[12],
                                 double *frequencies, double scalegrad,
-                                meep::grid_volume &gv, meep::volume &where, geom_epsilon *geps);
+                                meep::grid_volume &gv, meep::volume &where, geom_epsilon *geps,double du=1e-6);
 
 /***************************************************************/
 /* routines in GDSIIgeom.cc ************************************/