From 39d5a31f601327274c165606d65b9adb8901bd1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Mon, 9 Dec 2024 17:46:52 +0100
Subject: [PATCH 1/4] Skip incompatible FFT decompositions

---
 src/core/electrostatics/p3m.cpp  | 33 ++++++++++++++++++++++++++++++++
 src/core/p3m/TuningAlgorithm.cpp | 20 ++++++++++++++-----
 src/core/p3m/TuningAlgorithm.hpp |  6 ++++++
 3 files changed, 54 insertions(+), 5 deletions(-)
diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp
index 08cef7bd74..f8243bfab0 100644
--- a/src/core/electrostatics/p3m.cpp
+++ b/src/core/electrostatics/p3m.cpp
@@ -66,6 +66,7 @@
 #include <utils/integral_parameter.hpp>
 #include <utils/math/int_pow.hpp>
 #include <utils/math/sqr.hpp>
+#include <utils/serialization/array.hpp>
 
 #include <boost/mpi/collectives/all_reduce.hpp>
 #include <boost/mpi/collectives/broadcast.hpp>
@@ -632,6 +633,38 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
     return {};
   }
 
+  std::optional<std::string> fft_decomposition_veto(
+      Utils::Vector3i const &mesh_size_r_space) const override {
+#ifdef CUDA
+    if constexpr (Architecture == Arch::GPU) {
+      return std::nullopt;
+    }
+#endif
+    using Array3i = Utils::Array<int, 3u>;
+    auto const [KX, KY, KZ] = p3m.fft->get_permutations();
+    auto valid_decomposition = false;
+    Array3i mesh_size_k_space = {};
+    boost::mpi::reduce(
+        ::comm_cart, Array3i(p3m.mesh.stop), mesh_size_k_space,
+        [](Array3i const &lhs, Array3i const &rhs) {
+          return Array3i{{std::max(lhs[0u], rhs[0u]),
+                          std::max(lhs[1u], rhs[1u]),
+                          std::max(lhs[2u], rhs[2u])}};
+        },
+        0);
+    if (::this_node == 0) {
+      valid_decomposition = (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
+                             mesh_size_r_space[1u] == mesh_size_k_space[KY] and
+                             mesh_size_r_space[2u] == mesh_size_k_space[KZ]);
+    }
+    boost::mpi::broadcast(::comm_cart, valid_decomposition, 0);
+    std::optional<std::string> retval{"conflict with FFT domain decomposition"};
+    if (valid_decomposition) {
+      retval = std::nullopt;
+    }
+    return retval;
+  }
+
   std::tuple<double, double, double, double>
   calculate_accuracy(Utils::Vector3i const &mesh, int cao,
                      double r_cut_iL) const override {
diff --git a/src/core/p3m/TuningAlgorithm.cpp b/src/core/p3m/TuningAlgorithm.cpp
index ab6ad80c9d..2fd542e4ec 100644
--- a/src/core/p3m/TuningAlgorithm.cpp
+++ b/src/core/p3m/TuningAlgorithm.cpp
@@ -49,6 +49,8 @@ static auto constexpr P3M_TUNE_CAO_TOO_LARGE = 1.;
 static auto constexpr P3M_TUNE_ELC_GAP_SIZE = 2.;
 /** could not achieve target accuracy */
 static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE = 3.;
+/** conflict with FFT domain decomposition */
+static auto constexpr P3M_TUNE_FFT_MESH_SIZE = 4.;
 /**@}*/
 
 /** @brief Precision threshold for a non-zero real-space cutoff. */
@@ -110,7 +112,7 @@ void TuningAlgorithm::commit(Utils::Vector3i const &mesh, int cao,
  * @param[in,out] tuned_accuracy  @copybrief P3MParameters::accuracy
  *
  * @returns The integration time in case of success, otherwise
- *          -@ref P3M_TUNE_ACCURACY_TOO_LARGE,
+ *          -@ref P3M_TUNE_ACCURACY_TOO_LARGE, -@ref P3M_TUNE_FFT_MESH_SIZE,
  *          -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELC_GAP_SIZE
  */
 double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
@@ -171,17 +173,25 @@ double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
    * we know that the desired minimal accuracy is obtained */
   tuned_r_cut_iL = r_cut_iL = r_cut_iL_max;
 
+  auto const report_veto = [&](auto const &veto) {
+    if (veto) {
+      m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
+                         tuned_accuracy, rs_err, ks_err);
+    }
+    return static_cast<bool>(veto);
+  };
+
   /* if we are running P3M+ELC, check that r_cut is compatible */
   auto const r_cut = r_cut_iL * box_geo.length()[0];
-  auto const veto = layer_correction_veto_r_cut(r_cut);
-  if (veto) {
-    m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
-                       tuned_accuracy, rs_err, ks_err);
+  if (report_veto(layer_correction_veto_r_cut(r_cut))) {
     return -P3M_TUNE_ELC_GAP_SIZE;
   }
 
   commit(mesh, cao, r_cut_iL, tuned_alpha_L);
   on_solver_change();
+  if (report_veto(fft_decomposition_veto(mesh))) {
+    return -P3M_TUNE_FFT_MESH_SIZE;
+  }
   auto const int_time = benchmark_integration_step(m_system, m_timings);
 
   std::tie(tuned_accuracy, rs_err, ks_err, tuned_alpha_L) =
diff --git a/src/core/p3m/TuningAlgorithm.hpp b/src/core/p3m/TuningAlgorithm.hpp
index 0b4d87cdad..cd07355313 100644
--- a/src/core/p3m/TuningAlgorithm.hpp
+++ b/src/core/p3m/TuningAlgorithm.hpp
@@ -147,6 +147,12 @@ class TuningAlgorithm {
   virtual std::optional<std::string>
   layer_correction_veto_r_cut(double r_cut) const = 0;
 
+  /** @brief Veto FFT decomposition in non-cubic boxes. */
+  virtual std::optional<std::string>
+  fft_decomposition_veto(Utils::Vector3i const &) const {
+    return std::nullopt;
+  }
+
   /** @brief Write tuned parameters to the P3M parameter struct. */
   void commit(Utils::Vector3i const &mesh, int cao, double r_cut_iL,
               double alpha_L);

From 13c22b4788e9fd9b0c4c637bde79c8e0913f39a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Tue, 10 Dec 2024 19:27:44 +0100
Subject: [PATCH 2/4] Add allowed range for P3M mesh tuning

---
 src/core/electrostatics/p3m.cpp               | 19 +++++++--
 src/core/electrostatics/p3m.impl.hpp          |  6 ++-
 src/core/magnetostatics/dp3m.cpp              | 16 ++++++--
 src/core/magnetostatics/dp3m.impl.hpp         |  7 +++-
 .../EspressoSystemStandAlone_test.cpp         |  9 ++++-
 src/python/espressomd/electrostatics.py       |  8 ++++
 src/python/espressomd/magnetostatics.py       |  3 ++
 .../electrostatics/CoulombP3M.hpp             | 40 ++++++++++++++++++-
 .../magnetostatics/DipolarP3M.hpp             | 40 ++++++++++++++++++-
 testsuite/python/p3m_tuning_exceptions.py     | 30 ++++++++++++--
 testsuite/python/save_checkpoint.py           |  2 +
 testsuite/python/test_checkpoint.py           |  6 ++-
 12 files changed, 167 insertions(+), 19 deletions(-)

diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp
index f8243bfab0..0826e3871b 100644
--- a/src/core/electrostatics/p3m.cpp
+++ b/src/core/electrostatics/p3m.cpp
@@ -598,14 +598,17 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
   double m_mesh_density_min = -1., m_mesh_density_max = -1.;
   // indicates if mesh should be tuned
   bool m_tune_mesh = false;
+  std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
 
 protected:
   P3MParameters &get_params() override { return p3m.params; }
 
 public:
   CoulombTuningAlgorithm(System::System &system, auto &input_p3m,
-                         double prefactor, int timings)
-      : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {}
+                         double prefactor, int timings,
+                         decltype(m_tune_limits) tune_limits)
+      : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m},
+        m_tune_limits{std::move(tune_limits)} {}
 
   void on_solver_change() const override { m_system.on_coulomb_change(); }
 
@@ -722,6 +725,16 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
           max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
       m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
       m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
+      if (m_tune_limits.first or m_tune_limits.second) {
+        auto const &box_l = box_geo.length();
+        auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
+        if (m_tune_limits.first) {
+          m_mesh_density_min = static_cast<double>(*m_tune_limits.first) / dim;
+        }
+        if (m_tune_limits.second) {
+          m_mesh_density_max = static_cast<double>(*m_tune_limits.second) / dim;
+        }
+      }
       m_tune_mesh = true;
     } else {
       m_mesh_density_min = m_mesh_density_max = mesh_density;
@@ -805,7 +818,7 @@ void CoulombP3MImpl<FloatType, Architecture>::tune() {
     }
     try {
       CoulombTuningAlgorithm<FloatType, Architecture> parameters(
-          system, p3m, prefactor, tune_timings);
+          system, p3m, prefactor, tune_timings, tune_limits);
       parameters.setup_logger(tune_verbose);
       // parameter ranges
       parameters.determine_mesh_limits();
diff --git a/src/core/electrostatics/p3m.impl.hpp b/src/core/electrostatics/p3m.impl.hpp
index 8f8fb6d92f..de507df26e 100644
--- a/src/core/electrostatics/p3m.impl.hpp
+++ b/src/core/electrostatics/p3m.impl.hpp
@@ -36,6 +36,7 @@
 #include <utils/Vector.hpp>
 
 #include <memory>
+#include <optional>
 #include <stdexcept>
 #include <type_traits>
 #include <utility>
@@ -69,6 +70,7 @@ struct CoulombP3MImpl : public CoulombP3M {
   /** @brief Coulomb P3M meshes and FFT algorithm. */
   std::unique_ptr<p3m_data_struct_coulomb<FloatType>> p3m_impl;
   int tune_timings;
+  std::pair<std::optional<int>, std::optional<int>> tune_limits;
   bool tune_verbose;
   bool check_complex_residuals;
   bool m_is_tuned;
@@ -77,10 +79,10 @@ struct CoulombP3MImpl : public CoulombP3M {
   CoulombP3MImpl(
       std::unique_ptr<p3m_data_struct_coulomb<FloatType>> &&p3m_handle,
       double prefactor, int tune_timings, bool tune_verbose,
-      bool check_complex_residuals)
+      decltype(tune_limits) tune_limits, bool check_complex_residuals)
       : CoulombP3M(p3m_handle->params), p3m{*p3m_handle},
         p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings},
-        tune_verbose{tune_verbose},
+        tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose},
         check_complex_residuals{check_complex_residuals} {
 
     if (tune_timings <= 0) {
diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp
index 8855a8e4d8..f59613943c 100644
--- a/src/core/magnetostatics/dp3m.cpp
+++ b/src/core/magnetostatics/dp3m.cpp
@@ -74,6 +74,7 @@
 #include <sstream>
 #include <stdexcept>
 #include <tuple>
+#include <utility>
 #include <vector>
 
 #ifdef FFTW3_H
@@ -534,11 +535,14 @@ template <typename FloatType, Arch Architecture>
 class DipolarTuningAlgorithm : public TuningAlgorithm {
   p3m_data_struct_dipoles<FloatType> &dp3m;
   int m_mesh_max = -1, m_mesh_min = -1;
+  std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
 
 public:
   DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
-                         double prefactor, int timings)
-      : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m} {}
+                         double prefactor, int timings,
+                         decltype(m_tune_limits) tune_limits)
+      : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
+        m_tune_limits{std::move(tune_limits)} {}
 
   P3MParameters &get_params() override { return dp3m.params; }
 
@@ -603,6 +607,12 @@ class DipolarTuningAlgorithm : public TuningAlgorithm {
       m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
       /* avoid using more than 1 GB of FFT arrays */
       m_mesh_max = 128;
+      if (m_tune_limits.first) {
+        m_mesh_min = *m_tune_limits.first;
+      }
+      if (m_tune_limits.second) {
+        m_mesh_max = *m_tune_limits.second;
+      }
     } else {
       m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
       m_logger->report_fixed_mesh(dp3m.params.mesh);
@@ -662,7 +672,7 @@ void DipolarP3MImpl<FloatType, Architecture>::tune() {
     }
     try {
       DipolarTuningAlgorithm<FloatType, Architecture> parameters(
-          system, dp3m, prefactor, tune_timings);
+          system, dp3m, prefactor, tune_timings, tune_limits);
       parameters.setup_logger(tune_verbose);
       // parameter ranges
       parameters.determine_mesh_limits();
diff --git a/src/core/magnetostatics/dp3m.impl.hpp b/src/core/magnetostatics/dp3m.impl.hpp
index ee50a2bd9d..406ca1fc3a 100644
--- a/src/core/magnetostatics/dp3m.impl.hpp
+++ b/src/core/magnetostatics/dp3m.impl.hpp
@@ -36,6 +36,7 @@
 #include <utils/Vector.hpp>
 
 #include <memory>
+#include <optional>
 #include <stdexcept>
 #include <type_traits>
 #include <utility>
@@ -72,16 +73,18 @@ struct DipolarP3MImpl : public DipolarP3M {
   /** @brief Coulomb P3M meshes and FFT algorithm. */
   std::unique_ptr<p3m_data_struct_dipoles<FloatType>> dp3m_impl;
   int tune_timings;
+  std::pair<std::optional<int>, std::optional<int>> tune_limits;
   bool tune_verbose;
   bool m_is_tuned;
 
 public:
   DipolarP3MImpl(
       std::unique_ptr<p3m_data_struct_dipoles<FloatType>> &&dp3m_handle,
-      double prefactor, int tune_timings, bool tune_verbose)
+      double prefactor, int tune_timings, bool tune_verbose,
+      decltype(tune_limits) tune_limits)
       : DipolarP3M(dp3m_handle->params), dp3m{*dp3m_handle},
         dp3m_impl{std::move(dp3m_handle)}, tune_timings{tune_timings},
-        tune_verbose{tune_verbose} {
+        tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose} {
 
     if (tune_timings <= 0) {
       throw std::domain_error("Parameter 'timings' must be > 0");
diff --git a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp
index 829aedbfc6..47d9a3d515 100644
--- a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp
+++ b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp
@@ -75,6 +75,7 @@ namespace utf = boost::unit_test;
 #include <initializer_list>
 #include <limits>
 #include <memory>
+#include <optional>
 #include <stdexcept>
 #include <unordered_map>
 #include <utility>
@@ -228,6 +229,8 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) {
 
     // set up P3M
     auto const prefactor = 2.;
+    auto const mesh_range = std::pair<std::optional<int>, std::optional<int>>{
+        std::nullopt, std::nullopt};
     auto p3m = P3MParameters{false,
                              0.0,
                              3.5,
@@ -238,7 +241,7 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) {
                              1e-3};
     auto solver =
         new_p3m_handle<double, Arch::CPU, FFTBackendLegacy, FFTBuffersLegacy>(
-            std::move(p3m), prefactor, 1, false, true);
+            std::move(p3m), prefactor, 1, false, mesh_range, true);
     add_actor(comm, espresso::system, system.coulomb.impl->solver, solver,
               [&system]() { system.on_coulomb_change(); });
     BOOST_CHECK(not solver->is_gpu());
@@ -297,6 +300,8 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) {
 
     // set up P3M
     auto const prefactor = 2.;
+    auto const mesh_range = std::pair<std::optional<int>, std::optional<int>>{
+        std::nullopt, std::nullopt};
     auto p3m = P3MParameters{false,
                              0.0,
                              3.5,
@@ -307,7 +312,7 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) {
                              1e-3};
     auto solver =
         new_dp3m_handle<double, Arch::CPU, FFTBackendLegacy, FFTBuffersLegacy>(
-            std::move(p3m), prefactor, 1, false);
+            std::move(p3m), prefactor, 1, false, mesh_range);
     add_actor(comm, espresso::system, system.dipoles.impl->solver, solver,
               [&system]() { system.on_dipoles_change(); });
     BOOST_CHECK(not solver->is_gpu());
diff --git a/src/python/espressomd/electrostatics.py b/src/python/espressomd/electrostatics.py
index 87b23f0090..d3544734a4 100644
--- a/src/python/espressomd/electrostatics.py
+++ b/src/python/espressomd/electrostatics.py
@@ -209,6 +209,10 @@ class P3M(_P3MBase):
     tune : :obj:`bool`, optional
         Used to activate/deactivate the tuning method on activation.
         Defaults to ``True``.
+    tune_limits : (2,) array_like of :obj:`int`, optional
+        Lower and upper limits (inclusive) for the mesh size during tuning,
+        along the largest box dimension. Use ``None`` to not impose a limit.
+        Defaults to ``[None, None]``.
     timings : :obj:`int`
         Number of force calculations during tuning.
     verbose : :obj:`bool`, optional
@@ -266,6 +270,10 @@ class P3MGPU(_P3MBase):
         Defaults to ``True``.
     timings : :obj:`int`
         Number of force calculations during tuning.
+    tune_limits : (2,) array_like of :obj:`int`, optional
+        Lower and upper limits (inclusive) for the mesh size during tuning,
+        along the largest box dimension. Use ``None`` to not impose a limit.
+        Defaults to ``[None, None]``.
     verbose : :obj:`bool`, optional
         If ``False``, disable log output during tuning.
     check_neutrality : :obj:`bool`, optional
diff --git a/src/python/espressomd/magnetostatics.py b/src/python/espressomd/magnetostatics.py
index 0132dd32a0..1884ea884c 100644
--- a/src/python/espressomd/magnetostatics.py
+++ b/src/python/espressomd/magnetostatics.py
@@ -104,6 +104,9 @@ class DipolarP3M(MagnetostaticInteraction):
     tune : :obj:`bool`, optional
         Activate/deactivate the tuning method on activation
         (default is ``True``, i.e., activated).
+    tune_limits : (2,) array_like of :obj:`int`, optional
+        Lower and upper limits (inclusive) for the mesh size during tuning.
+        Use ``None`` to not impose a limit. Defaults to ``[None, None]``.
     timings : :obj:`int`
         Number of force calculations during tuning.
     single_precision : :obj:`bool`
diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp
index 5e16440de9..a641a6f625 100644
--- a/src/script_interface/electrostatics/CoulombP3M.hpp
+++ b/src/script_interface/electrostatics/CoulombP3M.hpp
@@ -33,6 +33,7 @@
 #include "script_interface/get_value.hpp"
 
 #include <memory>
+#include <optional>
 #include <stdexcept>
 #include <string>
 #include <utility>
@@ -51,6 +52,7 @@ class CoulombP3M : public Actor<CoulombP3M<Architecture>, ::CoulombP3M> {
   bool m_tune_verbose;
   bool m_check_complex_residuals;
   bool m_single_precision;
+  std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
 
 public:
   using Base = Actor<CoulombP3M<Architecture>, ::CoulombP3M>;
@@ -93,6 +95,15 @@ class CoulombP3M : public Actor<CoulombP3M<Architecture>, ::CoulombP3M> {
          [this]() { return m_tune_verbose; }},
         {"timings", AutoParameter::read_only,
          [this]() { return m_tune_timings; }},
+        {"tune_limits", AutoParameter::read_only,
+         [this]() {
+           auto const &[range_min, range_max] = m_tune_limits;
+           std::vector<Variant> retval = {
+               range_min ? Variant{*range_min} : Variant{None{}},
+               range_max ? Variant{*range_max} : Variant{None{}},
+           };
+           return retval;
+         }},
         {"tune", AutoParameter::read_only, [this]() { return m_tune; }},
         {"check_complex_residuals", AutoParameter::read_only,
          [this]() { return m_check_complex_residuals; }},
@@ -103,6 +114,33 @@ class CoulombP3M : public Actor<CoulombP3M<Architecture>, ::CoulombP3M> {
     m_tune = get_value<bool>(params, "tune");
     m_tune_timings = get_value<int>(params, "timings");
     m_tune_verbose = get_value<bool>(params, "verbose");
+    m_tune_limits = {std::nullopt, std::nullopt};
+    if (params.contains("tune_limits")) {
+      std::vector<Variant> range;
+      try {
+        auto const val = get_value<std::vector<int>>(params, "tune_limits");
+        assert(val.size() == 2u);
+        range.emplace_back(val[0u]);
+        range.emplace_back(val[1u]);
+      } catch (...) {
+        range = get_value<std::vector<Variant>>(params, "tune_limits");
+        assert(range.size() == 2u);
+      }
+      if (not is_none(range[0u])) {
+        m_tune_limits.first = get_value<int>(range[0u]);
+      }
+      if (not is_none(range[1u])) {
+        m_tune_limits.second = get_value<int>(range[1u]);
+      }
+      context()->parallel_try_catch([&]() {
+        if (m_tune_limits.first and *m_tune_limits.first <= 0) {
+          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+        }
+        if (m_tune_limits.second and *m_tune_limits.second <= 0) {
+          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+        }
+      });
+    }
     m_check_complex_residuals =
         get_value<bool>(params, "check_complex_residuals");
     auto const single_precision = get_value<bool>(params, "single_precision");
@@ -121,7 +159,7 @@ class CoulombP3M : public Actor<CoulombP3M<Architecture>, ::CoulombP3M> {
                                get_value<double>(params, "accuracy")};
       make_handle(single_precision, std::move(p3m),
                   get_value<double>(params, "prefactor"), m_tune_timings,
-                  m_tune_verbose, m_check_complex_residuals);
+                  m_tune_verbose, m_tune_limits, m_check_complex_residuals);
     });
     set_charge_neutrality_tolerance(params);
   }
diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp
index 67a2c2205d..8559171a27 100644
--- a/src/script_interface/magnetostatics/DipolarP3M.hpp
+++ b/src/script_interface/magnetostatics/DipolarP3M.hpp
@@ -33,6 +33,7 @@
 #include "script_interface/get_value.hpp"
 
 #include <memory>
+#include <optional>
 #include <stdexcept>
 #include <string>
 #include <utility>
@@ -47,6 +48,7 @@ namespace Dipoles {
 template <Arch Architecture>
 class DipolarP3M : public Actor<DipolarP3M<Architecture>, ::DipolarP3M> {
   int m_tune_timings;
+  std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
   bool m_tune;
   bool m_tune_verbose;
 
@@ -90,6 +92,15 @@ class DipolarP3M : public Actor<DipolarP3M<Architecture>, ::DipolarP3M> {
          [this]() { return m_tune_verbose; }},
         {"timings", AutoParameter::read_only,
          [this]() { return m_tune_timings; }},
+        {"tune_limits", AutoParameter::read_only,
+         [this]() {
+           auto const &[range_min, range_max] = m_tune_limits;
+           std::vector<Variant> retval = {
+               range_min ? Variant{*range_min} : Variant{None{}},
+               range_max ? Variant{*range_max} : Variant{None{}},
+           };
+           return retval;
+         }},
         {"tune", AutoParameter::read_only, [this]() { return m_tune; }},
     });
   }
@@ -98,6 +109,33 @@ class DipolarP3M : public Actor<DipolarP3M<Architecture>, ::DipolarP3M> {
     m_tune = get_value<bool>(params, "tune");
     m_tune_timings = get_value<int>(params, "timings");
     m_tune_verbose = get_value<bool>(params, "verbose");
+    m_tune_limits = {std::nullopt, std::nullopt};
+    if (params.contains("tune_limits")) {
+      std::vector<Variant> range;
+      try {
+        auto const val = get_value<std::vector<int>>(params, "tune_limits");
+        assert(val.size() == 2u);
+        range.emplace_back(val[0u]);
+        range.emplace_back(val[1u]);
+      } catch (...) {
+        range = get_value<std::vector<Variant>>(params, "tune_limits");
+        assert(range.size() == 2u);
+      }
+      if (not is_none(range[0u])) {
+        m_tune_limits.first = get_value<int>(range[0u]);
+      }
+      if (not is_none(range[1u])) {
+        m_tune_limits.second = get_value<int>(range[1u]);
+      }
+      context()->parallel_try_catch([&]() {
+        if (m_tune_limits.first and *m_tune_limits.first <= 0) {
+          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+        }
+        if (m_tune_limits.second and *m_tune_limits.second <= 0) {
+          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+        }
+      });
+    }
     auto const single_precision = get_value<bool>(params, "single_precision");
     static_assert(Architecture == Arch::CPU, "GPU not implemented");
     context()->parallel_try_catch([&]() {
@@ -111,7 +149,7 @@ class DipolarP3M : public Actor<DipolarP3M<Architecture>, ::DipolarP3M> {
                                get_value<double>(params, "accuracy")};
       make_handle(single_precision, std::move(p3m),
                   get_value<double>(params, "prefactor"), m_tune_timings,
-                  m_tune_verbose);
+                  m_tune_verbose, m_tune_limits);
     });
   }
 
diff --git a/testsuite/python/p3m_tuning_exceptions.py b/testsuite/python/p3m_tuning_exceptions.py
index ac31c7ddca..8ccfa6f569 100644
--- a/testsuite/python/p3m_tuning_exceptions.py
+++ b/testsuite/python/p3m_tuning_exceptions.py
@@ -205,6 +205,8 @@ def check_invalid_params(self, container, class_solver, **custom_params):
             ('alpha', -2.0, "Parameter 'alpha' must be > 0"),
             ('accuracy', -2.0, "Parameter 'accuracy' must be > 0"),
             ('mesh', (-1, -1, -1), "Parameter 'mesh' must be > 0"),
+            ('tune_limits', (-1, 1), "P3M mesh tuning limits: mesh must be > 0"),
+            ('tune_limits', (1, 0), "P3M mesh tuning limits: mesh must be > 0"),
             ('mesh', (2, 2, 2), "Parameter 'cao' cannot be larger than 'mesh'"),
             ('mesh_off', (-2, 1, 1), "Parameter 'mesh_off' must be >= 0 and <= 1"),
         ]
@@ -421,9 +423,20 @@ def test_09_no_errors_p3m_cpu(self):
         # tuning with cao or r_cut or mesh constrained, or without constraints
         for key, value in valid_params.items():
             solver = espressomd.electrostatics.P3M(
-                prefactor=2, accuracy=1e-2, epsilon=0.0, **{key: value})
+                prefactor=2, accuracy=1e-2, epsilon=0.0,
+                tune_limits=[2, 20], **{key: value})
             self.system.electrostatics.solver = solver
             self.system.electrostatics.solver = None
+        # tuning with mesh range constraint
+        for lower_limit in [None, 6]:
+            solver = espressomd.electrostatics.P3M(
+                prefactor=2, accuracy=1e-2, epsilon=0.0,
+                tune_limits=[lower_limit, 8], **{key: value})
+            self.system.electrostatics.solver = solver
+            if lower_limit is not None:
+                for i in range(3):
+                    self.assertIn(solver.mesh[i], [6, 8])
+            self.system.electrostatics.solver = None
 
     @utx.skipIfMissingFeatures("DP3M")
     def test_09_no_errors_dp3m_cpu(self):
@@ -437,9 +450,20 @@ def test_09_no_errors_dp3m_cpu(self):
         # tuning with cao or r_cut or mesh constrained, or without constraints
         for key, value in valid_params.items():
             solver = espressomd.magnetostatics.DipolarP3M(
-                prefactor=2, accuracy=1e-2, **{key: value})
+                prefactor=2, accuracy=1e-2,
+                tune_limits=[3, 17], **{key: value})
             self.system.magnetostatics.solver = solver
-            self.system.magnetostatics.clear()
+            self.system.magnetostatics.solver = None
+        # tuning with mesh range constraint
+        for lower_limit in [None, 3]:
+            solver = espressomd.magnetostatics.DipolarP3M(
+                prefactor=2, accuracy=1e-2,
+                tune_limits=[lower_limit, 5], **{key: value})
+            self.system.magnetostatics.solver = solver
+            if lower_limit is not None:
+                for i in range(3):
+                    self.assertIn(solver.mesh[i], [3, 5])
+            self.system.magnetostatics.solver = None
 
     @utx.skipIfMissingFeatures("P3M")
     def test_09_no_errors_p3m_cpu_rescale_mesh(self):
diff --git a/testsuite/python/save_checkpoint.py b/testsuite/python/save_checkpoint.py
index 28c5950a3c..504ec63546 100644
--- a/testsuite/python/save_checkpoint.py
+++ b/testsuite/python/save_checkpoint.py
@@ -154,6 +154,7 @@
         r_cut=1.0,
         check_complex_residuals=False,
         timings=15,
+        tune_limits=[8, 12],
         tune=False)
     if 'ELC' in modes:
         elc = espressomd.electrostatics.ELC(
@@ -350,6 +351,7 @@
         accuracy=0.01,
         single_precision=True,
         timings=15,
+        tune_limits=[11, 15],
         tune=False)
     system.magnetostatics.solver = dp3m
 
diff --git a/testsuite/python/test_checkpoint.py b/testsuite/python/test_checkpoint.py
index ed31ebc8d6..f2193a9c7c 100644
--- a/testsuite/python/test_checkpoint.py
+++ b/testsuite/python/test_checkpoint.py
@@ -835,8 +835,8 @@ def test_dp3m(self):
         state = actor.get_params()
         reference = {'prefactor': 1.0, 'accuracy': 0.01, 'mesh': 3 * [8],
                      'cao': 1, 'alpha': 12.0, 'r_cut': 2.4, 'tune': False,
-                     'mesh_off': [0.5, 0.5, 0.5], 'epsilon': 2.0,
-                     'timings': 15, 'single_precision': True}
+                     'mesh_off': [0.5, 0.5, 0.5], 'epsilon': 2.0, 'timings': 15,
+                     'tune_limits': [11, 15], 'single_precision': True}
         for key in reference:
             self.assertIn(key, state)
             np.testing.assert_almost_equal(state[key], reference[key],
@@ -852,6 +852,7 @@ def test_p3m(self):
         reference = {'prefactor': 1.0, 'accuracy': 0.1, 'mesh': 3 * [10],
                      'cao': 1, 'alpha': 1.0, 'r_cut': 1.0, 'tune': False,
                      'timings': 15, 'check_neutrality': True,
+                     'tune_limits': [8, 12],
                      'single_precision': single_precision,
                      'check_complex_residuals': False,
                      'charge_neutrality_tolerance': 1e-12}
@@ -870,6 +871,7 @@ def test_elc(self):
         p3m_reference = {'prefactor': 1.0, 'accuracy': 0.1, 'mesh': 3 * [10],
                          'cao': 1, 'alpha': 1.0, 'r_cut': 1.0, 'tune': False,
                          'timings': 15, 'check_neutrality': True,
+                         'tune_limits': [8, 12],
                          'check_complex_residuals': False,
                          'charge_neutrality_tolerance': 7e-12}
         elc_reference = {'gap_size': 6.0, 'maxPWerror': 0.1,

From f7f164e5db209d153823e739902d95c5e2c5b30c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Wed, 11 Dec 2024 17:33:30 +0100
Subject: [PATCH 3/4] Refactor P3M code

Use the same variable names in P3M and DP3M influence functions,
move their Brillouin limit to a template parameter, and improve
their documentation. Avoid division by zero in ELC with constant
potential.
---
 doc/doxygen/Doxyfile.in                     |  2 +-
 src/core/electrostatics/elc.cpp             |  8 +-
 src/core/electrostatics/p3m.cpp             |  4 +-
 src/core/magnetostatics/dp3m.cpp            |  9 +-
 src/core/p3m/common.hpp                     | 11 ++-
 src/core/p3m/data_struct.hpp                |  2 +-
 src/core/p3m/influence_function.hpp         | 73 ++++++++++-----
 src/core/p3m/influence_function_dipolar.hpp | 98 +++++++++++++--------
 src/core/unit_tests/p3m_test.cpp            |  4 +-
 9 files changed, 131 insertions(+), 80 deletions(-)

diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
index 67333f21af..b6c0a0e1b2 100644
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@@ -1511,7 +1511,7 @@ MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
 # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
-MATHJAX_EXTENSIONS     =
+MATHJAX_EXTENSIONS     = TeX/AMSmath
 
 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
diff --git a/src/core/electrostatics/elc.cpp b/src/core/electrostatics/elc.cpp
index a7988e84db..4e28654ae1 100644
--- a/src/core/electrostatics/elc.cpp
+++ b/src/core/electrostatics/elc.cpp
@@ -368,10 +368,6 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
   auto const &box_geo = *get_system().box_geo;
   auto const xy_area_inv = box_geo.length_inv()[0] * box_geo.length_inv()[1];
   auto const pref = prefactor * 2. * std::numbers::pi * xy_area_inv;
-  auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
-  auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
-  auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
-  auto const fac_delta = delta / (1. - delta);
 
   /* for non-neutral systems, this shift gives the background contribution
    * (rsp. for this shift, the DM of the background is zero) */
@@ -397,6 +393,10 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
       }
     } else {
       // metallic boundaries
+      auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
+      auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
+      auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
+      auto const fac_delta = delta / (1. - delta);
       clear_vec(gblcblk, size);
       auto const h = elc.box_h;
       ImageSum const image_sum{delta, shift, lz};
diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp
index 0826e3871b..99edb97c4c 100644
--- a/src/core/electrostatics/p3m.cpp
+++ b/src/core/electrostatics/p3m.cpp
@@ -127,7 +127,7 @@ void CoulombP3MImpl<FloatType, Architecture>::count_charged_particles() {
 template <typename FloatType, Arch Architecture>
 void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
   auto const [KX, KY, KZ] = p3m.fft->get_permutations();
-  p3m.g_force = grid_influence_function<FloatType, 1>(
+  p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN>(
       p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
       get_system().box_geo->length_inv());
 }
@@ -138,7 +138,7 @@ void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
 template <typename FloatType, Arch Architecture>
 void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
   auto const [KX, KY, KZ] = p3m.fft->get_permutations();
-  p3m.g_energy = grid_influence_function<FloatType, 0>(
+  p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN>(
       p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
       get_system().box_geo->length_inv());
 }
diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp
index f59613943c..1310ff24a7 100644
--- a/src/core/magnetostatics/dp3m.cpp
+++ b/src/core/magnetostatics/dp3m.cpp
@@ -116,8 +116,9 @@ double
 DipolarP3MImpl<FloatType, Architecture>::calc_average_self_energy_k_space()
     const {
   auto const &box_geo = *get_system().box_geo;
-  auto const node_phi = grid_influence_function_self_energy(
-      dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
+  auto const node_phi =
+      grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
+          dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
 
   double phi = 0.;
   boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
@@ -519,14 +520,14 @@ double DipolarP3MImpl<FloatType, Architecture>::calc_surface_term(
 
 template <typename FloatType, Arch Architecture>
 void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
-  dp3m.g_force = grid_influence_function<FloatType, 3>(
+  dp3m.g_force = grid_influence_function<FloatType, 3, P3M_BRILLOUIN>(
       dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
       get_system().box_geo->length_inv());
 }
 
 template <typename FloatType, Arch Architecture>
 void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
-  dp3m.g_energy = grid_influence_function<FloatType, 2>(
+  dp3m.g_energy = grid_influence_function<FloatType, 2, P3M_BRILLOUIN>(
       dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
       get_system().box_geo->length_inv());
 }
diff --git a/src/core/p3m/common.hpp b/src/core/p3m/common.hpp
index 3cb9c1cb45..1f0a0a1739 100644
--- a/src/core/p3m/common.hpp
+++ b/src/core/p3m/common.hpp
@@ -53,6 +53,7 @@ auto constexpr P3M_EPSILON_METALLIC = 0.0;
 #include <span>
 #include <stdexcept>
 
+/** @brief P3M kernel architecture. */
 enum class Arch { CPU, GPU };
 
 /** @brief Structure to hold P3M parameters and some dependent variables. */
@@ -65,8 +66,8 @@ struct P3MParameters {
   /** cutoff radius for real space electrostatics (>0), rescaled to
    *  @p r_cut_iL = @p r_cut * @p box_l_i. */
   double r_cut_iL;
-  /** number of mesh points per coordinate direction (>0). */
-  Utils::Vector3i mesh = {};
+  /** number of mesh points per coordinate direction (>0), in real space. */
+  Utils::Vector3i mesh;
   /** offset of the first mesh point (lower left corner) from the
    *  coordinate origin ([0,1[). */
   Utils::Vector3d mesh_off;
@@ -237,15 +238,14 @@ template <typename FloatType> struct P3MFFTMesh {
 
 #endif // defined(P3M) or defined(DP3M)
 
-namespace detail {
-/** Calculate indices that shift @ref P3MParameters::mesh "mesh" by `mesh/2`.
+/** @brief Calculate indices that shift @ref P3MParameters::mesh by `mesh/2`.
  *  For each mesh size @f$ n @f$ in @c mesh_size, create a sequence of integer
  *  values @f$ \left( 0, \ldots, \lfloor n/2 \rfloor, -\lfloor n/2 \rfloor,
  *  \ldots, -1\right) @f$ if @c zero_out_midpoint is false, otherwise
  *  @f$ \left( 0, \ldots, \lfloor n/2 - 1 \rfloor, 0, -\lfloor n/2 \rfloor,
  *  \ldots, -1\right) @f$.
  */
-std::array<std::vector<int>, 3> inline calc_meshift(
+std::array<std::vector<int>, 3> inline calc_p3m_mesh_shift(
     Utils::Vector3i const &mesh_size, bool zero_out_midpoint = false) {
   std::array<std::vector<int>, 3> ret{};
 
@@ -262,4 +262,3 @@ std::array<std::vector<int>, 3> inline calc_meshift(
 
   return ret;
 }
-} // namespace detail
diff --git a/src/core/p3m/data_struct.hpp b/src/core/p3m/data_struct.hpp
index 9bd81ad7c2..35e1cffc2f 100644
--- a/src/core/p3m/data_struct.hpp
+++ b/src/core/p3m/data_struct.hpp
@@ -66,7 +66,7 @@ template <typename FloatType> struct p3m_data_struct {
    *  i.e. the prefactor @f$ 2i\pi/L @f$ is missing!
    */
   void calc_differential_operator() {
-    d_op = detail::calc_meshift(params.mesh, true);
+    d_op = calc_p3m_mesh_shift(params.mesh, true);
   }
 
   /** @brief Force optimised influence function (k-space) */
diff --git a/src/core/p3m/influence_function.hpp b/src/core/p3m/influence_function.hpp
index c3373a88d1..c3487578cb 100644
--- a/src/core/p3m/influence_function.hpp
+++ b/src/core/p3m/influence_function.hpp
@@ -19,6 +19,10 @@
 
 #pragma once
 
+#include "config/config.hpp"
+
+#if defined(P3M)
+
 #include "p3m/common.hpp"
 #include "p3m/for_each_3d.hpp"
 #include "p3m/math.hpp"
@@ -36,34 +40,54 @@
 #include <vector>
 
 /**
- * @brief Hockney/Eastwood/Ballenegger optimal influence function.
+ * @brief Calculate the aliasing sums for the optimal influence function.
  *
  * This implements Eq. 30 of @cite cerda08d, which can be used
  * for monopole and dipole P3M by choosing the appropriate S factor.
  *
- * @tparam S Order of the differential operator, e.g. 0 for potential,
- *          1 for electric field, ...
- * @tparam m Number of aliasing terms to take into account.
+ * The full equation is:
+ * @f{align*}{
+ * G_{\text{opt}}(\vec{n}, S, \text{cao}) &=
+ *   \displaystyle\frac{4\pi}{\sum_\vec{m} U^2}
+ *   \sum_\vec{m}U^2
+ *   \displaystyle\frac{\left(\vec{k}\odot\vec{k}_m\right)^S}
+ *                     {|\vec{k}_m|^2}
+ *   e^{-1/(2\alpha)^2|\vec{k}_m|^2} \\
+ *
+ * U &= \operatorname{det}\left[I_3\cdot\operatorname{sinc}\left(
+ *          \frac{\vec{k}_m\odot\vec{a}}{2\pi}\right)\right]^{\text{cao}} \\
+ *
+ * \vec{k} &= \frac{2\pi}{\vec{N}\odot\vec{a}}\vec{s}[\vec{n}] \\
  *
- * @param cao Charge assignment order.
- * @param alpha Ewald splitting parameter.
- * @param k k-vector to evaluate the function for.
- * @param h Grid spacing.
+ * \vec{k}_m &= \frac{2\pi}{\vec{N}\odot\vec{a}}
+ *              \left(\vec{s}[\vec{n}]+\vec{m}\odot\vec{N}\right)
+ * @f}
+ *
+ * with @f$ I_3 @f$ the 3x3 identity matrix, @f$ \vec{N} @f$ the global mesh
+ * size in k-space coordinates, @f$ \vec{a} @f$ the mesh size, @f$ \vec{m} @f$
+ * the Brillouin zone coordinates, @f$ \odot @f$ the Hadamard product.
+ *
+ * @tparam S Order of the differential operator (0 for potential, 1 for E-field)
+ * @tparam m Number of Brillouin zones that contribute to the aliasing sums
+ *
+ * @param params   P3M parameters
+ * @param k        k-vector to evaluate the function for.
+ * @return The optimal influence function for a specific k-vector.
  */
 template <std::size_t S, std::size_t m>
-double G_opt(int cao, double alpha, Utils::Vector3d const &k,
-             Utils::Vector3d const &h) {
+double G_opt(P3MParameters const &params, Utils::Vector3d const &k) {
 
   auto const k2 = k.norm2();
   if (k2 == 0.) {
     return 0.;
   }
 
-  auto constexpr limit = 30.;
+  auto constexpr exp_limit = 30.;
   auto constexpr m_start = Utils::Vector3i::broadcast(-m);
   auto constexpr m_stop = Utils::Vector3i::broadcast(m + 1);
-  auto const exponent_prefactor = Utils::sqr(1. / (2. * alpha));
-  auto const wavevector = (2. * std::numbers::pi) / h;
+  auto const cao = params.cao;
+  auto const exponent_prefactor = Utils::sqr(1. / (2. * params.alpha));
+  auto const wavevector = (2. * std::numbers::pi) / params.a;
   auto const wavevector_i = 1. / wavevector;
   auto indices = Utils::Vector3i{};
   auto km = Utils::Vector3d{};
@@ -76,9 +100,9 @@ double G_opt(int cao, double alpha, Utils::Vector3d const &k,
       [&]() {
         auto const U2 = std::pow(Utils::product(fnm), 2 * cao);
         auto const km2 = km.norm2();
-        auto const exponent = exponent_prefactor * km2;
-        if (exponent < limit) {
-          auto const f3 = std::exp(-exponent) * (4. * std::numbers::pi / km2);
+        auto const exp_term = exponent_prefactor * km2;
+        if (exp_term < exp_limit) {
+          auto const f3 = std::exp(-exp_term) * (4. * std::numbers::pi / km2);
           numerator += U2 * f3 * Utils::int_pow<S>(k * km);
         }
         denominator += U2;
@@ -97,26 +121,25 @@ double G_opt(int cao, double alpha, Utils::Vector3d const &k,
  * This evaluates the optimal influence function @ref G_opt
  * over a regular grid of k vectors, and returns the values as a vector.
  *
- * @tparam S Order of the differential operator, e.g. 0 for potential,
- *          1 for electric field...
- * @tparam m Number of aliasing terms to take into account.
+ * @tparam S Order of the differential operator (0 for potential, 1 for E-field)
+ * @tparam m Number of Brillouin zones that contribute to the aliasing sums
  *
  * @param params P3M parameters.
- * @param n_start Lower left corner of the grid.
- * @param n_stop Upper right corner of the grid.
+ * @param n_start Lower left corner of the grid in k-space.
+ * @param n_stop Upper right corner of the grid in k-space.
  * @param KX k-space x-axis index.
  * @param KY k-space y-axis index.
  * @param KZ k-space z-axis index.
  * @param inv_box_l Inverse box length.
  * @return Values of G_opt at regular grid points.
  */
-template <typename FloatType, std::size_t S, std::size_t m = 0>
+template <typename FloatType, std::size_t S, std::size_t m>
 std::vector<FloatType> grid_influence_function(
     P3MParameters const &params, Utils::Vector3i const &n_start,
     Utils::Vector3i const &n_stop, int const KX, int const KY, int const KZ,
     Utils::Vector3d const &inv_box_l) {
 
-  auto const shifts = detail::calc_meshift(params.mesh);
+  auto const shifts = calc_p3m_mesh_shift(params.mesh);
   auto const size = n_stop - n_start;
 
   /* The influence function grid */
@@ -141,10 +164,12 @@ std::vector<FloatType> grid_influence_function(
           Utils::Vector3d{{shifts[0u][indices[KX]] * wavevector[0u],
                            shifts[1u][indices[KY]] * wavevector[1u],
                            shifts[2u][indices[KZ]] * wavevector[2u]}};
-      g[index] = FloatType(G_opt<S, m>(params.cao, params.alpha, k, params.a));
+      g[index] = FloatType(G_opt<S, m>(params, k));
     }
     ++index;
   });
 
   return g;
 }
+
+#endif // defined(P3M)
diff --git a/src/core/p3m/influence_function_dipolar.hpp b/src/core/p3m/influence_function_dipolar.hpp
index 74af200a20..d08a807069 100644
--- a/src/core/p3m/influence_function_dipolar.hpp
+++ b/src/core/p3m/influence_function_dipolar.hpp
@@ -39,31 +39,54 @@
 #include <numbers>
 #include <vector>
 
-/** Calculate the aliasing sums for the optimal influence function.
+/**
+ * @brief Calculate the aliasing sums for the optimal influence function.
+ *
+ * Evaluate the aliasing sums in the numerator and denominator of
+ * the expression for the optimal influence function, see
+ * @cite hockney88a eq (8-22), p. 275.
+ *
+ * The full equation is:
+ * @f{align*}{
+ * G_{\text{opt}}(\vec{n}, S, \text{cao}) &=
+ *   \displaystyle\frac{1}{\sum_\vec{m} U^2}
+ *   \sum_\vec{m}U^2
+ *   \displaystyle\frac{\left(\vec{d}_{\text{op}}[\vec{n}](\vec{s}[\vec{n}] +
+ *                            \vec{m}\odot\vec{N})\right)^S}
+ *                     {\left(\vec{s}[\vec{n}] + \vec{m}\odot\vec{N}\right)^2}
+ *   e^{-\pi^2\left(\alpha\vec{N}\odot\vec{a}\right)^{-2}\left(\vec{s}[\vec{n}]
+ *      +\vec{m}\odot\vec{N}\right)^2} \\
  *
- *  Calculates the aliasing sums in the numerator and denominator of
- *  the expression for the optimal influence function (see
- *  @cite hockney88a : 8-22, p. 275).
+ * U &= \operatorname{det}\left[I_3\cdot\operatorname{sinc}\left(
+ *         \vec{s}[\vec{n}]\oslash{\vec{N}}+\vec{m}\odot\vec{N}\right)
+ *      \right]^{\text{cao}}
+ * @f}
  *
- *  \tparam S          order (2 for energy, 3 for forces)
- *  \param params      DP3M parameters
- *  \param shift       shift for a given n-vector
- *  \param d_op        differential operator for a given n-vector
- *  \return The result of the fraction.
+ * with @f$ I_3 @f$ the 3x3 identity matrix, @f$ \vec{N} @f$ the global mesh
+ * size in k-space coordinates, @f$ \vec{a} @f$ the mesh size, @f$ \vec{m} @f$
+ * the Brillouin zone coordinates, @f$ \odot @f$ the Hadamard product,
+ * @f$ \oslash @f$ the Hadamard division.
+ *
+ * @tparam S Order of the differential operator (2 for energy, 3 for forces)
+ * @tparam m Number of Brillouin zones that contribute to the aliasing sums
+ *
+ * @param params      DP3M parameters
+ * @param shift       shifting integer vector for a specific k-vector
+ * @param d_op        differential operator for a specific k-vector
+ * @return The optimal influence function for a specific k-vector.
  */
-template <std::size_t S>
+template <std::size_t S, std::size_t m>
 double G_opt_dipolar(P3MParameters const &params, Utils::Vector3i const &shift,
                      Utils::Vector3i const &d_op) {
 
-  auto constexpr limit = P3M_BRILLOUIN;
   auto constexpr exp_limit = 30.;
-  auto constexpr m_start = Utils::Vector3i::broadcast(-limit);
-  auto constexpr m_stop = Utils::Vector3i::broadcast(limit + 1);
+  auto constexpr m_start = Utils::Vector3i::broadcast(-m);
+  auto constexpr m_stop = Utils::Vector3i::broadcast(m + 1);
   auto const cao = params.cao;
   auto const mesh = params.mesh[0];
   auto const offset =
       static_cast<Utils::Vector3d>(shift) / static_cast<double>(mesh);
-  auto const f2 = Utils::sqr(std::numbers::pi / params.alpha_L);
+  auto const exp_prefactor = Utils::sqr(std::numbers::pi / params.alpha_L);
   auto indices = Utils::Vector3i{};
   auto nm = Utils::Vector3i{};
   auto fnm = Utils::Vector3d{};
@@ -73,14 +96,14 @@ double G_opt_dipolar(P3MParameters const &params, Utils::Vector3i const &shift,
   for_each_3d(
       m_start, m_stop, indices,
       [&]() {
-        auto const norm_sq = nm.norm2();
-        auto const sz = std::pow(Utils::product(fnm), 2 * cao);
-        auto const exp_term = f2 * norm_sq;
+        auto const U2 = std::pow(Utils::product(fnm), 2 * cao);
+        auto const nm2 = nm.norm2();
+        auto const exp_term = exp_prefactor * nm2;
         if (exp_term < exp_limit) {
-          auto const f3 = sz * std::exp(-exp_term) / norm_sq;
+          auto const f3 = U2 * std::exp(-exp_term) / nm2;
           numerator += f3 * Utils::int_pow<S>(d_op * nm);
         }
-        denominator += sz;
+        denominator += U2;
       },
       [&](unsigned dim, int n) {
         nm[dim] = shift[dim] + n * mesh;
@@ -98,14 +121,15 @@ double G_opt_dipolar(P3MParameters const &params, Utils::Vector3i const &shift,
  * over a regular grid of k vectors, and returns the values as a vector.
  *
  * @tparam S Order of the differential operator, e.g. 2 for energy, 3 for force
+ * @tparam m Number of Brillouin zones that contribute to the aliasing sums
  *
  * @param params DP3M parameters
- * @param n_start Lower left corner of the grid
- * @param n_stop Upper right corner of the grid.
+ * @param n_start Lower left corner of the grid in k-space.
+ * @param n_stop Upper right corner of the grid in k-space.
  * @param inv_box_l Inverse box length
  * @return Values of the influence function at regular grid points.
  */
-template <typename FloatType, std::size_t S>
+template <typename FloatType, std::size_t S, std::size_t m>
 std::vector<FloatType> grid_influence_function(
     P3MParameters const &params, Utils::Vector3i const &n_start,
     Utils::Vector3i const &n_stop, Utils::Vector3d const &inv_box_l) {
@@ -124,8 +148,8 @@ std::vector<FloatType> grid_influence_function(
   auto prefactor = Utils::int_pow<3>(static_cast<double>(params.mesh[0])) * 2. *
                    Utils::int_pow<2>(inv_box_l[0]);
 
-  auto const offset = detail::calc_meshift(params.mesh, false)[0];
-  auto const d_op = detail::calc_meshift(params.mesh, true)[0];
+  auto const offset = calc_p3m_mesh_shift(params.mesh, false)[0];
+  auto const d_op = calc_p3m_mesh_shift(params.mesh, true)[0];
   auto const half_mesh = params.mesh[0] / 2;
   auto indices = Utils::Vector3i{};
   auto shift_off = Utils::Vector3i{};
@@ -137,8 +161,8 @@ std::vector<FloatType> grid_influence_function(
       [&]() {
         if (((indices[0] % half_mesh != 0) or (indices[1] % half_mesh != 0) or
              (indices[2] % half_mesh != 0))) {
-          g[index] = FloatType(prefactor *
-                               G_opt_dipolar<S>(params, shift_off, d_op_off));
+          g[index] = FloatType(
+              prefactor * G_opt_dipolar<S, m>(params, shift_off, d_op_off));
         }
         ++index;
       },
@@ -150,12 +174,12 @@ std::vector<FloatType> grid_influence_function(
   return g;
 }
 
+template <std::size_t m>
 inline double G_opt_dipolar_self_energy(P3MParameters const &params,
                                         Utils::Vector3i const &shift) {
 
-  auto constexpr limit = P3M_BRILLOUIN + 1;
-  auto constexpr m_start = Utils::Vector3i::broadcast(-limit);
-  auto constexpr m_stop = Utils::Vector3i::broadcast(limit + 1);
+  auto constexpr m_start = Utils::Vector3i::broadcast(-m);
+  auto constexpr m_stop = Utils::Vector3i::broadcast(m + 1);
   auto const cao = params.cao;
   auto const mesh = params.mesh[0];
   auto const offset =
@@ -177,19 +201,21 @@ inline double G_opt_dipolar_self_energy(P3MParameters const &params,
 /**
  * @brief Calculate self-energy of the influence function.
  *
+ * @tparam m Number of Brillouin zones that contribute to the aliasing sums
+ *
  * @param params DP3M parameters
- * @param n_start Lower left corner of the grid
- * @param n_stop Upper right corner of the grid.
+ * @param n_start Lower left corner of the grid in k-space.
+ * @param n_stop Upper right corner of the grid in k-space.
  * @param g Energies on the grid.
  * @return Total self-energy.
  */
-template <typename FloatType>
+template <typename FloatType, std::size_t m>
 inline double grid_influence_function_self_energy(
     P3MParameters const &params, Utils::Vector3i const &n_start,
     Utils::Vector3i const &n_stop, std::vector<FloatType> const &g) {
 
-  auto const offset = detail::calc_meshift(params.mesh, false)[0];
-  auto const d_op = detail::calc_meshift(params.mesh, true)[0];
+  auto const offset = calc_p3m_mesh_shift(params.mesh, false)[0];
+  auto const d_op = calc_p3m_mesh_shift(params.mesh, true)[0];
   auto const half_mesh = params.mesh[0] / 2;
   auto indices = Utils::Vector3i{};
   auto shift_off = Utils::Vector3i{};
@@ -202,8 +228,8 @@ inline double grid_influence_function_self_energy(
       [&]() {
         if (((indices[0] % half_mesh != 0) or (indices[1] % half_mesh != 0) or
              (indices[2] % half_mesh != 0))) {
-          auto const U2 = G_opt_dipolar_self_energy(params, shift_off);
-          energy += double(g[index]) * U2 * d_op_off.norm2();
+          auto const U2 = G_opt_dipolar_self_energy<m>(params, shift_off);
+          energy += static_cast<double>(g[index]) * U2 * d_op_off.norm2();
         }
         ++index;
       },
diff --git a/src/core/unit_tests/p3m_test.cpp b/src/core/unit_tests/p3m_test.cpp
index 3f091d565c..eea67334c0 100644
--- a/src/core/unit_tests/p3m_test.cpp
+++ b/src/core/unit_tests/p3m_test.cpp
@@ -35,7 +35,7 @@ BOOST_AUTO_TEST_CASE(calc_meshift_false) {
        std::vector<int>{0, 1, 2, 3, -3, -2, -1}}};
 
   auto const mesh = Utils::Vector3i{{1, 4, 7}};
-  auto const val = detail::calc_meshift(mesh, false);
+  auto const val = calc_p3m_mesh_shift(mesh, false);
 
   for (std::size_t i = 0u; i < 3u; ++i) {
     for (std::size_t j = 0u; j < ref[i].size(); ++j) {
@@ -50,7 +50,7 @@ BOOST_AUTO_TEST_CASE(calc_meshift_true) {
        std::vector<int>{0, 1, 2, 0, -3, -2, -1}}};
 
   auto const mesh = Utils::Vector3i{{1, 4, 7}};
-  auto const val = detail::calc_meshift(mesh, true);
+  auto const val = calc_p3m_mesh_shift(mesh, true);
 
   for (std::size_t i = 0u; i < 3u; ++i) {
     for (std::size_t j = 0u; j < ref[i].size(); ++j) {

From 75f0812180c38cc077ce536fe8aa0ebb418e850f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Thu, 12 Dec 2024 15:31:46 +0100
Subject: [PATCH 4/4] Skip infinite values in DLC parameter tuning

---
 src/core/magnetostatics/dlc.cpp               | 18 ++++++--
 .../electrostatics/CoulombP3M.hpp             | 41 +++++++++++--------
 .../magnetostatics/DipolarP3M.hpp             | 41 +++++++++++--------
 testsuite/python/p3m_tuning_exceptions.py     |  5 ++-
 4 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/src/core/magnetostatics/dlc.cpp b/src/core/magnetostatics/dlc.cpp
index 15d820d3c8..183564e818 100644
--- a/src/core/magnetostatics/dlc.cpp
+++ b/src/core/magnetostatics/dlc.cpp
@@ -50,6 +50,7 @@
 #include <cmath>
 #include <cstdio>
 #include <functional>
+#include <limits>
 #include <numbers>
 #include <numeric>
 #include <stdexcept>
@@ -430,15 +431,26 @@ double DipolarLayerCorrection::tune_far_cut() const {
   }
 
   auto constexpr limitkc = 200;
+  auto constexpr exp_max = +709.8; // for IEEE-compatible double
+  auto constexpr exp_min = -708.4; // for IEEE-compatible double
+  auto const log_max = std::log(std::numeric_limits<double>::max());
   auto const piarea = std::numbers::pi / (lx * ly);
   auto const nmp = static_cast<double>(count_magnetic_particles(system));
   auto const h = dlc.box_h;
   auto far_cut = -1.;
   for (int kc = 1; kc < limitkc; kc++) {
     auto const gc = kc * 2. * std::numbers::pi / lx;
-    auto const fa0 = sqrt(9. * exp(+2. * gc * h) * g1_DLC_dip(gc, lz - h) +
-                          9. * exp(-2. * gc * h) * g1_DLC_dip(gc, lz + h) +
-                          22. * g1_DLC_dip(gc, lz));
+    auto const pref1 = g1_DLC_dip(gc, lz - h);
+    auto const pref2 = g1_DLC_dip(gc, lz + h);
+    auto const pref0 = g1_DLC_dip(gc, lz);
+    auto const exp_term = 2. * gc * h;
+    auto const log_term1 = exp_term + std::log(9. * pref1);
+    if (exp_term >= exp_max or log_term1 >= log_max or -exp_term <= exp_min) {
+      // no solution can be found at larger k values due to overflow/underflow
+      break;
+    }
+    auto const fa0 = std::sqrt(9. * std::exp(+exp_term) * pref1 +
+                               9. * std::exp(-exp_term) * pref2 + 22. * pref0);
     auto const fa1 = sqrt(0.125 * piarea) * fa0;
     auto const fa2 = g2_DLC_dip(gc, lz);
     auto const de = nmp * mu_max_sq / (4. * (exp(gc * lz) - 1.)) * (fa1 + fa2);
diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp
index a641a6f625..5560fe7bff 100644
--- a/src/script_interface/electrostatics/CoulombP3M.hpp
+++ b/src/script_interface/electrostatics/CoulombP3M.hpp
@@ -116,28 +116,35 @@ class CoulombP3M : public Actor<CoulombP3M<Architecture>, ::CoulombP3M> {
     m_tune_verbose = get_value<bool>(params, "verbose");
     m_tune_limits = {std::nullopt, std::nullopt};
     if (params.contains("tune_limits")) {
-      std::vector<Variant> range;
-      try {
-        auto const val = get_value<std::vector<int>>(params, "tune_limits");
-        assert(val.size() == 2u);
-        range.emplace_back(val[0u]);
-        range.emplace_back(val[1u]);
-      } catch (...) {
-        range = get_value<std::vector<Variant>>(params, "tune_limits");
-        assert(range.size() == 2u);
-      }
-      if (not is_none(range[0u])) {
-        m_tune_limits.first = get_value<int>(range[0u]);
-      }
-      if (not is_none(range[1u])) {
-        m_tune_limits.second = get_value<int>(range[1u]);
+      auto const &variant = params.at("tune_limits");
+      std::size_t range_length = 0u;
+      if (is_type<std::vector<int>>(variant)) {
+        auto const range = get_value<std::vector<int>>(variant);
+        range_length = range.size();
+        if (range_length == 2u) {
+          m_tune_limits = {range[0u], range[1u]};
+        }
+      } else {
+        auto const range = get_value<std::vector<Variant>>(variant);
+        range_length = range.size();
+        if (range_length == 2u) {
+          if (not is_none(range[0u])) {
+            m_tune_limits.first = get_value<int>(range[0u]);
+          }
+          if (not is_none(range[1u])) {
+            m_tune_limits.second = get_value<int>(range[1u]);
+          }
+        }
       }
       context()->parallel_try_catch([&]() {
+        if (range_length != 2u) {
+          throw std::invalid_argument("Parameter 'tune_limits' needs 2 values");
+        }
         if (m_tune_limits.first and *m_tune_limits.first <= 0) {
-          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+          throw std::domain_error("Parameter 'tune_limits' must be > 0");
         }
         if (m_tune_limits.second and *m_tune_limits.second <= 0) {
-          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+          throw std::domain_error("Parameter 'tune_limits' must be > 0");
         }
       });
     }
diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp
index 8559171a27..4436bd58c5 100644
--- a/src/script_interface/magnetostatics/DipolarP3M.hpp
+++ b/src/script_interface/magnetostatics/DipolarP3M.hpp
@@ -111,28 +111,35 @@ class DipolarP3M : public Actor<DipolarP3M<Architecture>, ::DipolarP3M> {
     m_tune_verbose = get_value<bool>(params, "verbose");
     m_tune_limits = {std::nullopt, std::nullopt};
     if (params.contains("tune_limits")) {
-      std::vector<Variant> range;
-      try {
-        auto const val = get_value<std::vector<int>>(params, "tune_limits");
-        assert(val.size() == 2u);
-        range.emplace_back(val[0u]);
-        range.emplace_back(val[1u]);
-      } catch (...) {
-        range = get_value<std::vector<Variant>>(params, "tune_limits");
-        assert(range.size() == 2u);
-      }
-      if (not is_none(range[0u])) {
-        m_tune_limits.first = get_value<int>(range[0u]);
-      }
-      if (not is_none(range[1u])) {
-        m_tune_limits.second = get_value<int>(range[1u]);
+      auto const &variant = params.at("tune_limits");
+      std::size_t range_length = 0u;
+      if (is_type<std::vector<int>>(variant)) {
+        auto const range = get_value<std::vector<int>>(variant);
+        range_length = range.size();
+        if (range_length == 2u) {
+          m_tune_limits = {range[0u], range[1u]};
+        }
+      } else {
+        auto const range = get_value<std::vector<Variant>>(variant);
+        range_length = range.size();
+        if (range_length == 2u) {
+          if (not is_none(range[0u])) {
+            m_tune_limits.first = get_value<int>(range[0u]);
+          }
+          if (not is_none(range[1u])) {
+            m_tune_limits.second = get_value<int>(range[1u]);
+          }
+        }
       }
       context()->parallel_try_catch([&]() {
+        if (range_length != 2u) {
+          throw std::invalid_argument("Parameter 'tune_limits' needs 2 values");
+        }
         if (m_tune_limits.first and *m_tune_limits.first <= 0) {
-          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+          throw std::domain_error("Parameter 'tune_limits' must be > 0");
         }
         if (m_tune_limits.second and *m_tune_limits.second <= 0) {
-          throw std::domain_error("P3M mesh tuning limits: mesh must be > 0");
+          throw std::domain_error("Parameter 'tune_limits' must be > 0");
         }
       });
     }
diff --git a/testsuite/python/p3m_tuning_exceptions.py b/testsuite/python/p3m_tuning_exceptions.py
index 8ccfa6f569..e7aa4e82dc 100644
--- a/testsuite/python/p3m_tuning_exceptions.py
+++ b/testsuite/python/p3m_tuning_exceptions.py
@@ -205,8 +205,9 @@ def check_invalid_params(self, container, class_solver, **custom_params):
             ('alpha', -2.0, "Parameter 'alpha' must be > 0"),
             ('accuracy', -2.0, "Parameter 'accuracy' must be > 0"),
             ('mesh', (-1, -1, -1), "Parameter 'mesh' must be > 0"),
-            ('tune_limits', (-1, 1), "P3M mesh tuning limits: mesh must be > 0"),
-            ('tune_limits', (1, 0), "P3M mesh tuning limits: mesh must be > 0"),
+            ('tune_limits', (-1,), "Parameter 'tune_limits' needs 2 values"),
+            ('tune_limits', (-1, 1), "Parameter 'tune_limits' must be > 0"),
+            ('tune_limits', (1, 0), "Parameter 'tune_limits' must be > 0"),
             ('mesh', (2, 2, 2), "Parameter 'cao' cannot be larger than 'mesh'"),
             ('mesh_off', (-2, 1, 1), "Parameter 'mesh_off' must be >= 0 and <= 1"),
         ]