From 39d5a31f601327274c165606d65b9adb8901bd1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Mon, 9 Dec 2024 17:46:52 +0100 Subject: [PATCH 1/4] Skip incompatible FFT decompositions --- src/core/electrostatics/p3m.cpp | 33 ++++++++++++++++++++++++++++++++ src/core/p3m/TuningAlgorithm.cpp | 20 ++++++++++++++----- src/core/p3m/TuningAlgorithm.hpp | 6 ++++++ 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index 08cef7bd74..f8243bfab0 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -632,6 +633,38 @@ class CoulombTuningAlgorithm : public TuningAlgorithm { return {}; } + std::optional fft_decomposition_veto( + Utils::Vector3i const &mesh_size_r_space) const override { +#ifdef CUDA + if constexpr (Architecture == Arch::GPU) { + return std::nullopt; + } +#endif + using Array3i = Utils::Array; + auto const [KX, KY, KZ] = p3m.fft->get_permutations(); + auto valid_decomposition = false; + Array3i mesh_size_k_space = {}; + boost::mpi::reduce( + ::comm_cart, Array3i(p3m.mesh.stop), mesh_size_k_space, + [](Array3i const &lhs, Array3i const &rhs) { + return Array3i{{std::max(lhs[0u], rhs[0u]), + std::max(lhs[1u], rhs[1u]), + std::max(lhs[2u], rhs[2u])}}; + }, + 0); + if (::this_node == 0) { + valid_decomposition = (mesh_size_r_space[0u] == mesh_size_k_space[KX] and + mesh_size_r_space[1u] == mesh_size_k_space[KY] and + mesh_size_r_space[2u] == mesh_size_k_space[KZ]); + } + boost::mpi::broadcast(::comm_cart, valid_decomposition, 0); + std::optional retval{"conflict with FFT domain decomposition"}; + if (valid_decomposition) { + retval = std::nullopt; + } + return retval; + } + std::tuple calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override { diff --git a/src/core/p3m/TuningAlgorithm.cpp b/src/core/p3m/TuningAlgorithm.cpp index ab6ad80c9d..2fd542e4ec 100644 --- a/src/core/p3m/TuningAlgorithm.cpp +++ b/src/core/p3m/TuningAlgorithm.cpp @@ -49,6 +49,8 @@ static auto constexpr P3M_TUNE_CAO_TOO_LARGE = 1.; static auto constexpr P3M_TUNE_ELC_GAP_SIZE = 2.; /** could not achieve target accuracy */ static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE = 3.; +/** conflict with FFT domain decomposition */ +static auto constexpr P3M_TUNE_FFT_MESH_SIZE = 4.; /**@}*/ /** @brief Precision threshold for a non-zero real-space cutoff. */ @@ -110,7 +112,7 @@ void TuningAlgorithm::commit(Utils::Vector3i const &mesh, int cao, * @param[in,out] tuned_accuracy @copybrief P3MParameters::accuracy * * @returns The integration time in case of success, otherwise - * -@ref P3M_TUNE_ACCURACY_TOO_LARGE, + * -@ref P3M_TUNE_ACCURACY_TOO_LARGE, -@ref P3M_TUNE_FFT_MESH_SIZE, * -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELC_GAP_SIZE */ double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao, @@ -171,17 +173,25 @@ double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao, * we know that the desired minimal accuracy is obtained */ tuned_r_cut_iL = r_cut_iL = r_cut_iL_max; + auto const report_veto = [&](auto const &veto) { + if (veto) { + m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L, + tuned_accuracy, rs_err, ks_err); + } + return static_cast(veto); + }; + /* if we are running P3M+ELC, check that r_cut is compatible */ auto const r_cut = r_cut_iL * box_geo.length()[0]; - auto const veto = layer_correction_veto_r_cut(r_cut); - if (veto) { - m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L, - tuned_accuracy, rs_err, ks_err); + if (report_veto(layer_correction_veto_r_cut(r_cut))) { return -P3M_TUNE_ELC_GAP_SIZE; } commit(mesh, cao, r_cut_iL, tuned_alpha_L); on_solver_change(); + if (report_veto(fft_decomposition_veto(mesh))) { + return -P3M_TUNE_FFT_MESH_SIZE; + } auto const int_time = benchmark_integration_step(m_system, m_timings); std::tie(tuned_accuracy, rs_err, ks_err, tuned_alpha_L) = diff --git a/src/core/p3m/TuningAlgorithm.hpp b/src/core/p3m/TuningAlgorithm.hpp index 0b4d87cdad..cd07355313 100644 --- a/src/core/p3m/TuningAlgorithm.hpp +++ b/src/core/p3m/TuningAlgorithm.hpp @@ -147,6 +147,12 @@ class TuningAlgorithm { virtual std::optional layer_correction_veto_r_cut(double r_cut) const = 0; + /** @brief Veto FFT decomposition in non-cubic boxes. */ + virtual std::optional + fft_decomposition_veto(Utils::Vector3i const &) const { + return std::nullopt; + } + /** @brief Write tuned parameters to the P3M parameter struct. */ void commit(Utils::Vector3i const &mesh, int cao, double r_cut_iL, double alpha_L); From 13c22b4788e9fd9b0c4c637bde79c8e0913f39a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Tue, 10 Dec 2024 19:27:44 +0100 Subject: [PATCH 2/4] Add allowed range for P3M mesh tuning --- src/core/electrostatics/p3m.cpp | 19 +++++++-- src/core/electrostatics/p3m.impl.hpp | 6 ++- src/core/magnetostatics/dp3m.cpp | 16 ++++++-- src/core/magnetostatics/dp3m.impl.hpp | 7 +++- .../EspressoSystemStandAlone_test.cpp | 9 ++++- src/python/espressomd/electrostatics.py | 8 ++++ src/python/espressomd/magnetostatics.py | 3 ++ .../electrostatics/CoulombP3M.hpp | 40 ++++++++++++++++++- .../magnetostatics/DipolarP3M.hpp | 40 ++++++++++++++++++- testsuite/python/p3m_tuning_exceptions.py | 30 ++++++++++++-- testsuite/python/save_checkpoint.py | 2 + testsuite/python/test_checkpoint.py | 6 ++- 12 files changed, 167 insertions(+), 19 deletions(-) diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index f8243bfab0..0826e3871b 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -598,14 +598,17 @@ class CoulombTuningAlgorithm : public TuningAlgorithm { double m_mesh_density_min = -1., m_mesh_density_max = -1.; // indicates if mesh should be tuned bool m_tune_mesh = false; + std::pair, std::optional> m_tune_limits; protected: P3MParameters &get_params() override { return p3m.params; } public: CoulombTuningAlgorithm(System::System &system, auto &input_p3m, - double prefactor, int timings) - : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {} + double prefactor, int timings, + decltype(m_tune_limits) tune_limits) + : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m}, + m_tune_limits{std::move(tune_limits)} {} void on_solver_change() const override { m_system.on_coulomb_change(); } @@ -722,6 +725,16 @@ class CoulombTuningAlgorithm : public TuningAlgorithm { max_npart_per_dim, std::cbrt(static_cast(p3m.sum_qpart))); m_mesh_density_min = min_npart_per_dim / normalized_box_dim; m_mesh_density_max = max_npart_per_dim / normalized_box_dim; + if (m_tune_limits.first or m_tune_limits.second) { + auto const &box_l = box_geo.length(); + auto const dim = std::max({box_l[0], box_l[1], box_l[2]}); + if (m_tune_limits.first) { + m_mesh_density_min = static_cast(*m_tune_limits.first) / dim; + } + if (m_tune_limits.second) { + m_mesh_density_max = static_cast(*m_tune_limits.second) / dim; + } + } m_tune_mesh = true; } else { m_mesh_density_min = m_mesh_density_max = mesh_density; @@ -805,7 +818,7 @@ void CoulombP3MImpl::tune() { } try { CoulombTuningAlgorithm parameters( - system, p3m, prefactor, tune_timings); + system, p3m, prefactor, tune_timings, tune_limits); parameters.setup_logger(tune_verbose); // parameter ranges parameters.determine_mesh_limits(); diff --git a/src/core/electrostatics/p3m.impl.hpp b/src/core/electrostatics/p3m.impl.hpp index 8f8fb6d92f..de507df26e 100644 --- a/src/core/electrostatics/p3m.impl.hpp +++ b/src/core/electrostatics/p3m.impl.hpp @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -69,6 +70,7 @@ struct CoulombP3MImpl : public CoulombP3M { /** @brief Coulomb P3M meshes and FFT algorithm. */ std::unique_ptr> p3m_impl; int tune_timings; + std::pair, std::optional> tune_limits; bool tune_verbose; bool check_complex_residuals; bool m_is_tuned; @@ -77,10 +79,10 @@ struct CoulombP3MImpl : public CoulombP3M { CoulombP3MImpl( std::unique_ptr> &&p3m_handle, double prefactor, int tune_timings, bool tune_verbose, - bool check_complex_residuals) + decltype(tune_limits) tune_limits, bool check_complex_residuals) : CoulombP3M(p3m_handle->params), p3m{*p3m_handle}, p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings}, - tune_verbose{tune_verbose}, + tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose}, check_complex_residuals{check_complex_residuals} { if (tune_timings <= 0) { diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp index 8855a8e4d8..f59613943c 100644 --- a/src/core/magnetostatics/dp3m.cpp +++ b/src/core/magnetostatics/dp3m.cpp @@ -74,6 +74,7 @@ #include #include #include +#include #include #ifdef FFTW3_H @@ -534,11 +535,14 @@ template class DipolarTuningAlgorithm : public TuningAlgorithm { p3m_data_struct_dipoles &dp3m; int m_mesh_max = -1, m_mesh_min = -1; + std::pair, std::optional> m_tune_limits; public: DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, - double prefactor, int timings) - : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m} {} + double prefactor, int timings, + decltype(m_tune_limits) tune_limits) + : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m}, + m_tune_limits{std::move(tune_limits)} {} P3MParameters &get_params() override { return dp3m.params; } @@ -603,6 +607,12 @@ class DipolarTuningAlgorithm : public TuningAlgorithm { m_mesh_min = static_cast(std::round(std::pow(2., std::floor(expo)))); /* avoid using more than 1 GB of FFT arrays */ m_mesh_max = 128; + if (m_tune_limits.first) { + m_mesh_min = *m_tune_limits.first; + } + if (m_tune_limits.second) { + m_mesh_max = *m_tune_limits.second; + } } else { m_mesh_min = m_mesh_max = dp3m.params.mesh[0]; m_logger->report_fixed_mesh(dp3m.params.mesh); @@ -662,7 +672,7 @@ void DipolarP3MImpl::tune() { } try { DipolarTuningAlgorithm parameters( - system, dp3m, prefactor, tune_timings); + system, dp3m, prefactor, tune_timings, tune_limits); parameters.setup_logger(tune_verbose); // parameter ranges parameters.determine_mesh_limits(); diff --git a/src/core/magnetostatics/dp3m.impl.hpp b/src/core/magnetostatics/dp3m.impl.hpp index ee50a2bd9d..406ca1fc3a 100644 --- a/src/core/magnetostatics/dp3m.impl.hpp +++ b/src/core/magnetostatics/dp3m.impl.hpp @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -72,16 +73,18 @@ struct DipolarP3MImpl : public DipolarP3M { /** @brief Coulomb P3M meshes and FFT algorithm. */ std::unique_ptr> dp3m_impl; int tune_timings; + std::pair, std::optional> tune_limits; bool tune_verbose; bool m_is_tuned; public: DipolarP3MImpl( std::unique_ptr> &&dp3m_handle, - double prefactor, int tune_timings, bool tune_verbose) + double prefactor, int tune_timings, bool tune_verbose, + decltype(tune_limits) tune_limits) : DipolarP3M(dp3m_handle->params), dp3m{*dp3m_handle}, dp3m_impl{std::move(dp3m_handle)}, tune_timings{tune_timings}, - tune_verbose{tune_verbose} { + tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose} { if (tune_timings <= 0) { throw std::domain_error("Parameter 'timings' must be > 0"); diff --git a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp index 829aedbfc6..47d9a3d515 100644 --- a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp +++ b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp @@ -75,6 +75,7 @@ namespace utf = boost::unit_test; #include #include #include +#include #include #include #include @@ -228,6 +229,8 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { // set up P3M auto const prefactor = 2.; + auto const mesh_range = std::pair, std::optional>{ + std::nullopt, std::nullopt}; auto p3m = P3MParameters{false, 0.0, 3.5, @@ -238,7 +241,7 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { 1e-3}; auto solver = new_p3m_handle( - std::move(p3m), prefactor, 1, false, true); + std::move(p3m), prefactor, 1, false, mesh_range, true); add_actor(comm, espresso::system, system.coulomb.impl->solver, solver, [&system]() { system.on_coulomb_change(); }); BOOST_CHECK(not solver->is_gpu()); @@ -297,6 +300,8 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { // set up P3M auto const prefactor = 2.; + auto const mesh_range = std::pair, std::optional>{ + std::nullopt, std::nullopt}; auto p3m = P3MParameters{false, 0.0, 3.5, @@ -307,7 +312,7 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { 1e-3}; auto solver = new_dp3m_handle( - std::move(p3m), prefactor, 1, false); + std::move(p3m), prefactor, 1, false, mesh_range); add_actor(comm, espresso::system, system.dipoles.impl->solver, solver, [&system]() { system.on_dipoles_change(); }); BOOST_CHECK(not solver->is_gpu()); diff --git a/src/python/espressomd/electrostatics.py b/src/python/espressomd/electrostatics.py index 87b23f0090..d3544734a4 100644 --- a/src/python/espressomd/electrostatics.py +++ b/src/python/espressomd/electrostatics.py @@ -209,6 +209,10 @@ class P3M(_P3MBase): tune : :obj:`bool`, optional Used to activate/deactivate the tuning method on activation. Defaults to ``True``. + tune_limits : (2,) array_like of :obj:`int`, optional + Lower and upper limits (inclusive) for the mesh size during tuning, + along the largest box dimension. Use ``None`` to not impose a limit. + Defaults to ``[None, None]``. timings : :obj:`int` Number of force calculations during tuning. verbose : :obj:`bool`, optional @@ -266,6 +270,10 @@ class P3MGPU(_P3MBase): Defaults to ``True``. timings : :obj:`int` Number of force calculations during tuning. + tune_limits : (2,) array_like of :obj:`int`, optional + Lower and upper limits (inclusive) for the mesh size during tuning, + along the largest box dimension. Use ``None`` to not impose a limit. + Defaults to ``[None, None]``. verbose : :obj:`bool`, optional If ``False``, disable log output during tuning. check_neutrality : :obj:`bool`, optional diff --git a/src/python/espressomd/magnetostatics.py b/src/python/espressomd/magnetostatics.py index 0132dd32a0..1884ea884c 100644 --- a/src/python/espressomd/magnetostatics.py +++ b/src/python/espressomd/magnetostatics.py @@ -104,6 +104,9 @@ class DipolarP3M(MagnetostaticInteraction): tune : :obj:`bool`, optional Activate/deactivate the tuning method on activation (default is ``True``, i.e., activated). + tune_limits : (2,) array_like of :obj:`int`, optional + Lower and upper limits (inclusive) for the mesh size during tuning. + Use ``None`` to not impose a limit. Defaults to ``[None, None]``. timings : :obj:`int` Number of force calculations during tuning. single_precision : :obj:`bool` diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp index 5e16440de9..a641a6f625 100644 --- a/src/script_interface/electrostatics/CoulombP3M.hpp +++ b/src/script_interface/electrostatics/CoulombP3M.hpp @@ -33,6 +33,7 @@ #include "script_interface/get_value.hpp" #include +#include #include #include #include @@ -51,6 +52,7 @@ class CoulombP3M : public Actor, ::CoulombP3M> { bool m_tune_verbose; bool m_check_complex_residuals; bool m_single_precision; + std::pair, std::optional> m_tune_limits; public: using Base = Actor, ::CoulombP3M>; @@ -93,6 +95,15 @@ class CoulombP3M : public Actor, ::CoulombP3M> { [this]() { return m_tune_verbose; }}, {"timings", AutoParameter::read_only, [this]() { return m_tune_timings; }}, + {"tune_limits", AutoParameter::read_only, + [this]() { + auto const &[range_min, range_max] = m_tune_limits; + std::vector retval = { + range_min ? Variant{*range_min} : Variant{None{}}, + range_max ? Variant{*range_max} : Variant{None{}}, + }; + return retval; + }}, {"tune", AutoParameter::read_only, [this]() { return m_tune; }}, {"check_complex_residuals", AutoParameter::read_only, [this]() { return m_check_complex_residuals; }}, @@ -103,6 +114,33 @@ class CoulombP3M : public Actor, ::CoulombP3M> { m_tune = get_value(params, "tune"); m_tune_timings = get_value(params, "timings"); m_tune_verbose = get_value(params, "verbose"); + m_tune_limits = {std::nullopt, std::nullopt}; + if (params.contains("tune_limits")) { + std::vector range; + try { + auto const val = get_value>(params, "tune_limits"); + assert(val.size() == 2u); + range.emplace_back(val[0u]); + range.emplace_back(val[1u]); + } catch (...) { + range = get_value>(params, "tune_limits"); + assert(range.size() == 2u); + } + if (not is_none(range[0u])) { + m_tune_limits.first = get_value(range[0u]); + } + if (not is_none(range[1u])) { + m_tune_limits.second = get_value(range[1u]); + } + context()->parallel_try_catch([&]() { + if (m_tune_limits.first and *m_tune_limits.first <= 0) { + throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + } + if (m_tune_limits.second and *m_tune_limits.second <= 0) { + throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + } + }); + } m_check_complex_residuals = get_value(params, "check_complex_residuals"); auto const single_precision = get_value(params, "single_precision"); @@ -121,7 +159,7 @@ class CoulombP3M : public Actor, ::CoulombP3M> { get_value(params, "accuracy")}; make_handle(single_precision, std::move(p3m), get_value(params, "prefactor"), m_tune_timings, - m_tune_verbose, m_check_complex_residuals); + m_tune_verbose, m_tune_limits, m_check_complex_residuals); }); set_charge_neutrality_tolerance(params); } diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp index 67a2c2205d..8559171a27 100644 --- a/src/script_interface/magnetostatics/DipolarP3M.hpp +++ b/src/script_interface/magnetostatics/DipolarP3M.hpp @@ -33,6 +33,7 @@ #include "script_interface/get_value.hpp" #include +#include #include #include #include @@ -47,6 +48,7 @@ namespace Dipoles { template class DipolarP3M : public Actor, ::DipolarP3M> { int m_tune_timings; + std::pair, std::optional> m_tune_limits; bool m_tune; bool m_tune_verbose; @@ -90,6 +92,15 @@ class DipolarP3M : public Actor, ::DipolarP3M> { [this]() { return m_tune_verbose; }}, {"timings", AutoParameter::read_only, [this]() { return m_tune_timings; }}, + {"tune_limits", AutoParameter::read_only, + [this]() { + auto const &[range_min, range_max] = m_tune_limits; + std::vector retval = { + range_min ? Variant{*range_min} : Variant{None{}}, + range_max ? Variant{*range_max} : Variant{None{}}, + }; + return retval; + }}, {"tune", AutoParameter::read_only, [this]() { return m_tune; }}, }); } @@ -98,6 +109,33 @@ class DipolarP3M : public Actor, ::DipolarP3M> { m_tune = get_value(params, "tune"); m_tune_timings = get_value(params, "timings"); m_tune_verbose = get_value(params, "verbose"); + m_tune_limits = {std::nullopt, std::nullopt}; + if (params.contains("tune_limits")) { + std::vector range; + try { + auto const val = get_value>(params, "tune_limits"); + assert(val.size() == 2u); + range.emplace_back(val[0u]); + range.emplace_back(val[1u]); + } catch (...) { + range = get_value>(params, "tune_limits"); + assert(range.size() == 2u); + } + if (not is_none(range[0u])) { + m_tune_limits.first = get_value(range[0u]); + } + if (not is_none(range[1u])) { + m_tune_limits.second = get_value(range[1u]); + } + context()->parallel_try_catch([&]() { + if (m_tune_limits.first and *m_tune_limits.first <= 0) { + throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + } + if (m_tune_limits.second and *m_tune_limits.second <= 0) { + throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + } + }); + } auto const single_precision = get_value(params, "single_precision"); static_assert(Architecture == Arch::CPU, "GPU not implemented"); context()->parallel_try_catch([&]() { @@ -111,7 +149,7 @@ class DipolarP3M : public Actor, ::DipolarP3M> { get_value(params, "accuracy")}; make_handle(single_precision, std::move(p3m), get_value(params, "prefactor"), m_tune_timings, - m_tune_verbose); + m_tune_verbose, m_tune_limits); }); } diff --git a/testsuite/python/p3m_tuning_exceptions.py b/testsuite/python/p3m_tuning_exceptions.py index ac31c7ddca..8ccfa6f569 100644 --- a/testsuite/python/p3m_tuning_exceptions.py +++ b/testsuite/python/p3m_tuning_exceptions.py @@ -205,6 +205,8 @@ def check_invalid_params(self, container, class_solver, **custom_params): ('alpha', -2.0, "Parameter 'alpha' must be > 0"), ('accuracy', -2.0, "Parameter 'accuracy' must be > 0"), ('mesh', (-1, -1, -1), "Parameter 'mesh' must be > 0"), + ('tune_limits', (-1, 1), "P3M mesh tuning limits: mesh must be > 0"), + ('tune_limits', (1, 0), "P3M mesh tuning limits: mesh must be > 0"), ('mesh', (2, 2, 2), "Parameter 'cao' cannot be larger than 'mesh'"), ('mesh_off', (-2, 1, 1), "Parameter 'mesh_off' must be >= 0 and <= 1"), ] @@ -421,9 +423,20 @@ def test_09_no_errors_p3m_cpu(self): # tuning with cao or r_cut or mesh constrained, or without constraints for key, value in valid_params.items(): solver = espressomd.electrostatics.P3M( - prefactor=2, accuracy=1e-2, epsilon=0.0, **{key: value}) + prefactor=2, accuracy=1e-2, epsilon=0.0, + tune_limits=[2, 20], **{key: value}) self.system.electrostatics.solver = solver self.system.electrostatics.solver = None + # tuning with mesh range constraint + for lower_limit in [None, 6]: + solver = espressomd.electrostatics.P3M( + prefactor=2, accuracy=1e-2, epsilon=0.0, + tune_limits=[lower_limit, 8], **{key: value}) + self.system.electrostatics.solver = solver + if lower_limit is not None: + for i in range(3): + self.assertIn(solver.mesh[i], [6, 8]) + self.system.electrostatics.solver = None @utx.skipIfMissingFeatures("DP3M") def test_09_no_errors_dp3m_cpu(self): @@ -437,9 +450,20 @@ def test_09_no_errors_dp3m_cpu(self): # tuning with cao or r_cut or mesh constrained, or without constraints for key, value in valid_params.items(): solver = espressomd.magnetostatics.DipolarP3M( - prefactor=2, accuracy=1e-2, **{key: value}) + prefactor=2, accuracy=1e-2, + tune_limits=[3, 17], **{key: value}) self.system.magnetostatics.solver = solver - self.system.magnetostatics.clear() + self.system.magnetostatics.solver = None + # tuning with mesh range constraint + for lower_limit in [None, 3]: + solver = espressomd.magnetostatics.DipolarP3M( + prefactor=2, accuracy=1e-2, + tune_limits=[lower_limit, 5], **{key: value}) + self.system.magnetostatics.solver = solver + if lower_limit is not None: + for i in range(3): + self.assertIn(solver.mesh[i], [3, 5]) + self.system.magnetostatics.solver = None @utx.skipIfMissingFeatures("P3M") def test_09_no_errors_p3m_cpu_rescale_mesh(self): diff --git a/testsuite/python/save_checkpoint.py b/testsuite/python/save_checkpoint.py index 28c5950a3c..504ec63546 100644 --- a/testsuite/python/save_checkpoint.py +++ b/testsuite/python/save_checkpoint.py @@ -154,6 +154,7 @@ r_cut=1.0, check_complex_residuals=False, timings=15, + tune_limits=[8, 12], tune=False) if 'ELC' in modes: elc = espressomd.electrostatics.ELC( @@ -350,6 +351,7 @@ accuracy=0.01, single_precision=True, timings=15, + tune_limits=[11, 15], tune=False) system.magnetostatics.solver = dp3m diff --git a/testsuite/python/test_checkpoint.py b/testsuite/python/test_checkpoint.py index ed31ebc8d6..f2193a9c7c 100644 --- a/testsuite/python/test_checkpoint.py +++ b/testsuite/python/test_checkpoint.py @@ -835,8 +835,8 @@ def test_dp3m(self): state = actor.get_params() reference = {'prefactor': 1.0, 'accuracy': 0.01, 'mesh': 3 * [8], 'cao': 1, 'alpha': 12.0, 'r_cut': 2.4, 'tune': False, - 'mesh_off': [0.5, 0.5, 0.5], 'epsilon': 2.0, - 'timings': 15, 'single_precision': True} + 'mesh_off': [0.5, 0.5, 0.5], 'epsilon': 2.0, 'timings': 15, + 'tune_limits': [11, 15], 'single_precision': True} for key in reference: self.assertIn(key, state) np.testing.assert_almost_equal(state[key], reference[key], @@ -852,6 +852,7 @@ def test_p3m(self): reference = {'prefactor': 1.0, 'accuracy': 0.1, 'mesh': 3 * [10], 'cao': 1, 'alpha': 1.0, 'r_cut': 1.0, 'tune': False, 'timings': 15, 'check_neutrality': True, + 'tune_limits': [8, 12], 'single_precision': single_precision, 'check_complex_residuals': False, 'charge_neutrality_tolerance': 1e-12} @@ -870,6 +871,7 @@ def test_elc(self): p3m_reference = {'prefactor': 1.0, 'accuracy': 0.1, 'mesh': 3 * [10], 'cao': 1, 'alpha': 1.0, 'r_cut': 1.0, 'tune': False, 'timings': 15, 'check_neutrality': True, + 'tune_limits': [8, 12], 'check_complex_residuals': False, 'charge_neutrality_tolerance': 7e-12} elc_reference = {'gap_size': 6.0, 'maxPWerror': 0.1, From f7f164e5db209d153823e739902d95c5e2c5b30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 11 Dec 2024 17:33:30 +0100 Subject: [PATCH 3/4] Refactor P3M code Use the same variable names in P3M and DP3M influence functions, move their Brillouin limit to a template parameter, and improve their documentation. Avoid division by zero in ELC with constant potential. --- doc/doxygen/Doxyfile.in | 2 +- src/core/electrostatics/elc.cpp | 8 +- src/core/electrostatics/p3m.cpp | 4 +- src/core/magnetostatics/dp3m.cpp | 9 +- src/core/p3m/common.hpp | 11 ++- src/core/p3m/data_struct.hpp | 2 +- src/core/p3m/influence_function.hpp | 73 ++++++++++----- src/core/p3m/influence_function_dipolar.hpp | 98 +++++++++++++-------- src/core/unit_tests/p3m_test.cpp | 4 +- 9 files changed, 131 insertions(+), 80 deletions(-) diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in index 67333f21af..b6c0a0e1b2 100644 --- a/doc/doxygen/Doxyfile.in +++ b/doc/doxygen/Doxyfile.in @@ -1511,7 +1511,7 @@ MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. -MATHJAX_EXTENSIONS = +MATHJAX_EXTENSIONS = TeX/AMSmath # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site diff --git a/src/core/electrostatics/elc.cpp b/src/core/electrostatics/elc.cpp index a7988e84db..4e28654ae1 100644 --- a/src/core/electrostatics/elc.cpp +++ b/src/core/electrostatics/elc.cpp @@ -368,10 +368,6 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const { auto const &box_geo = *get_system().box_geo; auto const xy_area_inv = box_geo.length_inv()[0] * box_geo.length_inv()[1]; auto const pref = prefactor * 2. * std::numbers::pi * xy_area_inv; - auto const delta = elc.delta_mid_top * elc.delta_mid_bot; - auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta); - auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta); - auto const fac_delta = delta / (1. - delta); /* for non-neutral systems, this shift gives the background contribution * (rsp. for this shift, the DM of the background is zero) */ @@ -397,6 +393,10 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const { } } else { // metallic boundaries + auto const delta = elc.delta_mid_top * elc.delta_mid_bot; + auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta); + auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta); + auto const fac_delta = delta / (1. - delta); clear_vec(gblcblk, size); auto const h = elc.box_h; ImageSum const image_sum{delta, shift, lz}; diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index 0826e3871b..99edb97c4c 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -127,7 +127,7 @@ void CoulombP3MImpl::count_charged_particles() { template void CoulombP3MImpl::calc_influence_function_force() { auto const [KX, KY, KZ] = p3m.fft->get_permutations(); - p3m.g_force = grid_influence_function( + p3m.g_force = grid_influence_function( p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ, get_system().box_geo->length_inv()); } @@ -138,7 +138,7 @@ void CoulombP3MImpl::calc_influence_function_force() { template void CoulombP3MImpl::calc_influence_function_energy() { auto const [KX, KY, KZ] = p3m.fft->get_permutations(); - p3m.g_energy = grid_influence_function( + p3m.g_energy = grid_influence_function( p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ, get_system().box_geo->length_inv()); } diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp index f59613943c..1310ff24a7 100644 --- a/src/core/magnetostatics/dp3m.cpp +++ b/src/core/magnetostatics/dp3m.cpp @@ -116,8 +116,9 @@ double DipolarP3MImpl::calc_average_self_energy_k_space() const { auto const &box_geo = *get_system().box_geo; - auto const node_phi = grid_influence_function_self_energy( - dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy); + auto const node_phi = + grid_influence_function_self_energy( + dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy); double phi = 0.; boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0); @@ -519,14 +520,14 @@ double DipolarP3MImpl::calc_surface_term( template void DipolarP3MImpl::calc_influence_function_force() { - dp3m.g_force = grid_influence_function( + dp3m.g_force = grid_influence_function( dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, get_system().box_geo->length_inv()); } template void DipolarP3MImpl::calc_influence_function_energy() { - dp3m.g_energy = grid_influence_function( + dp3m.g_energy = grid_influence_function( dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, get_system().box_geo->length_inv()); } diff --git a/src/core/p3m/common.hpp b/src/core/p3m/common.hpp index 3cb9c1cb45..1f0a0a1739 100644 --- a/src/core/p3m/common.hpp +++ b/src/core/p3m/common.hpp @@ -53,6 +53,7 @@ auto constexpr P3M_EPSILON_METALLIC = 0.0; #include #include +/** @brief P3M kernel architecture. */ enum class Arch { CPU, GPU }; /** @brief Structure to hold P3M parameters and some dependent variables. */ @@ -65,8 +66,8 @@ struct P3MParameters { /** cutoff radius for real space electrostatics (>0), rescaled to * @p r_cut_iL = @p r_cut * @p box_l_i. */ double r_cut_iL; - /** number of mesh points per coordinate direction (>0). */ - Utils::Vector3i mesh = {}; + /** number of mesh points per coordinate direction (>0), in real space. */ + Utils::Vector3i mesh; /** offset of the first mesh point (lower left corner) from the * coordinate origin ([0,1[). */ Utils::Vector3d mesh_off; @@ -237,15 +238,14 @@ template struct P3MFFTMesh { #endif // defined(P3M) or defined(DP3M) -namespace detail { -/** Calculate indices that shift @ref P3MParameters::mesh "mesh" by `mesh/2`. +/** @brief Calculate indices that shift @ref P3MParameters::mesh by `mesh/2`. * For each mesh size @f$ n @f$ in @c mesh_size, create a sequence of integer * values @f$ \left( 0, \ldots, \lfloor n/2 \rfloor, -\lfloor n/2 \rfloor, * \ldots, -1\right) @f$ if @c zero_out_midpoint is false, otherwise * @f$ \left( 0, \ldots, \lfloor n/2 - 1 \rfloor, 0, -\lfloor n/2 \rfloor, * \ldots, -1\right) @f$. */ -std::array, 3> inline calc_meshift( +std::array, 3> inline calc_p3m_mesh_shift( Utils::Vector3i const &mesh_size, bool zero_out_midpoint = false) { std::array, 3> ret{}; @@ -262,4 +262,3 @@ std::array, 3> inline calc_meshift( return ret; } -} // namespace detail diff --git a/src/core/p3m/data_struct.hpp b/src/core/p3m/data_struct.hpp index 9bd81ad7c2..35e1cffc2f 100644 --- a/src/core/p3m/data_struct.hpp +++ b/src/core/p3m/data_struct.hpp @@ -66,7 +66,7 @@ template struct p3m_data_struct { * i.e. the prefactor @f$ 2i\pi/L @f$ is missing! */ void calc_differential_operator() { - d_op = detail::calc_meshift(params.mesh, true); + d_op = calc_p3m_mesh_shift(params.mesh, true); } /** @brief Force optimised influence function (k-space) */ diff --git a/src/core/p3m/influence_function.hpp b/src/core/p3m/influence_function.hpp index c3373a88d1..c3487578cb 100644 --- a/src/core/p3m/influence_function.hpp +++ b/src/core/p3m/influence_function.hpp @@ -19,6 +19,10 @@ #pragma once +#include "config/config.hpp" + +#if defined(P3M) + #include "p3m/common.hpp" #include "p3m/for_each_3d.hpp" #include "p3m/math.hpp" @@ -36,34 +40,54 @@ #include /** - * @brief Hockney/Eastwood/Ballenegger optimal influence function. + * @brief Calculate the aliasing sums for the optimal influence function. * * This implements Eq. 30 of @cite cerda08d, which can be used * for monopole and dipole P3M by choosing the appropriate S factor. * - * @tparam S Order of the differential operator, e.g. 0 for potential, - * 1 for electric field, ... - * @tparam m Number of aliasing terms to take into account. + * The full equation is: + * @f{align*}{ + * G_{\text{opt}}(\vec{n}, S, \text{cao}) &= + * \displaystyle\frac{4\pi}{\sum_\vec{m} U^2} + * \sum_\vec{m}U^2 + * \displaystyle\frac{\left(\vec{k}\odot\vec{k}_m\right)^S} + * {|\vec{k}_m|^2} + * e^{-1/(2\alpha)^2|\vec{k}_m|^2} \\ + * + * U &= \operatorname{det}\left[I_3\cdot\operatorname{sinc}\left( + * \frac{\vec{k}_m\odot\vec{a}}{2\pi}\right)\right]^{\text{cao}} \\ + * + * \vec{k} &= \frac{2\pi}{\vec{N}\odot\vec{a}}\vec{s}[\vec{n}] \\ * - * @param cao Charge assignment order. - * @param alpha Ewald splitting parameter. - * @param k k-vector to evaluate the function for. - * @param h Grid spacing. + * \vec{k}_m &= \frac{2\pi}{\vec{N}\odot\vec{a}} + * \left(\vec{s}[\vec{n}]+\vec{m}\odot\vec{N}\right) + * @f} + * + * with @f$ I_3 @f$ the 3x3 identity matrix, @f$ \vec{N} @f$ the global mesh + * size in k-space coordinates, @f$ \vec{a} @f$ the mesh size, @f$ \vec{m} @f$ + * the Brillouin zone coordinates, @f$ \odot @f$ the Hadamard product. + * + * @tparam S Order of the differential operator (0 for potential, 1 for E-field) + * @tparam m Number of Brillouin zones that contribute to the aliasing sums + * + * @param params P3M parameters + * @param k k-vector to evaluate the function for. + * @return The optimal influence function for a specific k-vector. */ template -double G_opt(int cao, double alpha, Utils::Vector3d const &k, - Utils::Vector3d const &h) { +double G_opt(P3MParameters const ¶ms, Utils::Vector3d const &k) { auto const k2 = k.norm2(); if (k2 == 0.) { return 0.; } - auto constexpr limit = 30.; + auto constexpr exp_limit = 30.; auto constexpr m_start = Utils::Vector3i::broadcast(-m); auto constexpr m_stop = Utils::Vector3i::broadcast(m + 1); - auto const exponent_prefactor = Utils::sqr(1. / (2. * alpha)); - auto const wavevector = (2. * std::numbers::pi) / h; + auto const cao = params.cao; + auto const exponent_prefactor = Utils::sqr(1. / (2. * params.alpha)); + auto const wavevector = (2. * std::numbers::pi) / params.a; auto const wavevector_i = 1. / wavevector; auto indices = Utils::Vector3i{}; auto km = Utils::Vector3d{}; @@ -76,9 +100,9 @@ double G_opt(int cao, double alpha, Utils::Vector3d const &k, [&]() { auto const U2 = std::pow(Utils::product(fnm), 2 * cao); auto const km2 = km.norm2(); - auto const exponent = exponent_prefactor * km2; - if (exponent < limit) { - auto const f3 = std::exp(-exponent) * (4. * std::numbers::pi / km2); + auto const exp_term = exponent_prefactor * km2; + if (exp_term < exp_limit) { + auto const f3 = std::exp(-exp_term) * (4. * std::numbers::pi / km2); numerator += U2 * f3 * Utils::int_pow(k * km); } denominator += U2; @@ -97,26 +121,25 @@ double G_opt(int cao, double alpha, Utils::Vector3d const &k, * This evaluates the optimal influence function @ref G_opt * over a regular grid of k vectors, and returns the values as a vector. * - * @tparam S Order of the differential operator, e.g. 0 for potential, - * 1 for electric field... - * @tparam m Number of aliasing terms to take into account. + * @tparam S Order of the differential operator (0 for potential, 1 for E-field) + * @tparam m Number of Brillouin zones that contribute to the aliasing sums * * @param params P3M parameters. - * @param n_start Lower left corner of the grid. - * @param n_stop Upper right corner of the grid. + * @param n_start Lower left corner of the grid in k-space. + * @param n_stop Upper right corner of the grid in k-space. * @param KX k-space x-axis index. * @param KY k-space y-axis index. * @param KZ k-space z-axis index. * @param inv_box_l Inverse box length. * @return Values of G_opt at regular grid points. */ -template +template std::vector grid_influence_function( P3MParameters const ¶ms, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, int const KX, int const KY, int const KZ, Utils::Vector3d const &inv_box_l) { - auto const shifts = detail::calc_meshift(params.mesh); + auto const shifts = calc_p3m_mesh_shift(params.mesh); auto const size = n_stop - n_start; /* The influence function grid */ @@ -141,10 +164,12 @@ std::vector grid_influence_function( Utils::Vector3d{{shifts[0u][indices[KX]] * wavevector[0u], shifts[1u][indices[KY]] * wavevector[1u], shifts[2u][indices[KZ]] * wavevector[2u]}}; - g[index] = FloatType(G_opt(params.cao, params.alpha, k, params.a)); + g[index] = FloatType(G_opt(params, k)); } ++index; }); return g; } + +#endif // defined(P3M) diff --git a/src/core/p3m/influence_function_dipolar.hpp b/src/core/p3m/influence_function_dipolar.hpp index 74af200a20..d08a807069 100644 --- a/src/core/p3m/influence_function_dipolar.hpp +++ b/src/core/p3m/influence_function_dipolar.hpp @@ -39,31 +39,54 @@ #include #include -/** Calculate the aliasing sums for the optimal influence function. +/** + * @brief Calculate the aliasing sums for the optimal influence function. + * + * Evaluate the aliasing sums in the numerator and denominator of + * the expression for the optimal influence function, see + * @cite hockney88a eq (8-22), p. 275. + * + * The full equation is: + * @f{align*}{ + * G_{\text{opt}}(\vec{n}, S, \text{cao}) &= + * \displaystyle\frac{1}{\sum_\vec{m} U^2} + * \sum_\vec{m}U^2 + * \displaystyle\frac{\left(\vec{d}_{\text{op}}[\vec{n}](\vec{s}[\vec{n}] + + * \vec{m}\odot\vec{N})\right)^S} + * {\left(\vec{s}[\vec{n}] + \vec{m}\odot\vec{N}\right)^2} + * e^{-\pi^2\left(\alpha\vec{N}\odot\vec{a}\right)^{-2}\left(\vec{s}[\vec{n}] + * +\vec{m}\odot\vec{N}\right)^2} \\ * - * Calculates the aliasing sums in the numerator and denominator of - * the expression for the optimal influence function (see - * @cite hockney88a : 8-22, p. 275). + * U &= \operatorname{det}\left[I_3\cdot\operatorname{sinc}\left( + * \vec{s}[\vec{n}]\oslash{\vec{N}}+\vec{m}\odot\vec{N}\right) + * \right]^{\text{cao}} + * @f} * - * \tparam S order (2 for energy, 3 for forces) - * \param params DP3M parameters - * \param shift shift for a given n-vector - * \param d_op differential operator for a given n-vector - * \return The result of the fraction. + * with @f$ I_3 @f$ the 3x3 identity matrix, @f$ \vec{N} @f$ the global mesh + * size in k-space coordinates, @f$ \vec{a} @f$ the mesh size, @f$ \vec{m} @f$ + * the Brillouin zone coordinates, @f$ \odot @f$ the Hadamard product, + * @f$ \oslash @f$ the Hadamard division. + * + * @tparam S Order of the differential operator (2 for energy, 3 for forces) + * @tparam m Number of Brillouin zones that contribute to the aliasing sums + * + * @param params DP3M parameters + * @param shift shifting integer vector for a specific k-vector + * @param d_op differential operator for a specific k-vector + * @return The optimal influence function for a specific k-vector. */ -template +template double G_opt_dipolar(P3MParameters const ¶ms, Utils::Vector3i const &shift, Utils::Vector3i const &d_op) { - auto constexpr limit = P3M_BRILLOUIN; auto constexpr exp_limit = 30.; - auto constexpr m_start = Utils::Vector3i::broadcast(-limit); - auto constexpr m_stop = Utils::Vector3i::broadcast(limit + 1); + auto constexpr m_start = Utils::Vector3i::broadcast(-m); + auto constexpr m_stop = Utils::Vector3i::broadcast(m + 1); auto const cao = params.cao; auto const mesh = params.mesh[0]; auto const offset = static_cast(shift) / static_cast(mesh); - auto const f2 = Utils::sqr(std::numbers::pi / params.alpha_L); + auto const exp_prefactor = Utils::sqr(std::numbers::pi / params.alpha_L); auto indices = Utils::Vector3i{}; auto nm = Utils::Vector3i{}; auto fnm = Utils::Vector3d{}; @@ -73,14 +96,14 @@ double G_opt_dipolar(P3MParameters const ¶ms, Utils::Vector3i const &shift, for_each_3d( m_start, m_stop, indices, [&]() { - auto const norm_sq = nm.norm2(); - auto const sz = std::pow(Utils::product(fnm), 2 * cao); - auto const exp_term = f2 * norm_sq; + auto const U2 = std::pow(Utils::product(fnm), 2 * cao); + auto const nm2 = nm.norm2(); + auto const exp_term = exp_prefactor * nm2; if (exp_term < exp_limit) { - auto const f3 = sz * std::exp(-exp_term) / norm_sq; + auto const f3 = U2 * std::exp(-exp_term) / nm2; numerator += f3 * Utils::int_pow(d_op * nm); } - denominator += sz; + denominator += U2; }, [&](unsigned dim, int n) { nm[dim] = shift[dim] + n * mesh; @@ -98,14 +121,15 @@ double G_opt_dipolar(P3MParameters const ¶ms, Utils::Vector3i const &shift, * over a regular grid of k vectors, and returns the values as a vector. * * @tparam S Order of the differential operator, e.g. 2 for energy, 3 for force + * @tparam m Number of Brillouin zones that contribute to the aliasing sums * * @param params DP3M parameters - * @param n_start Lower left corner of the grid - * @param n_stop Upper right corner of the grid. + * @param n_start Lower left corner of the grid in k-space. + * @param n_stop Upper right corner of the grid in k-space. * @param inv_box_l Inverse box length * @return Values of the influence function at regular grid points. */ -template +template std::vector grid_influence_function( P3MParameters const ¶ms, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, Utils::Vector3d const &inv_box_l) { @@ -124,8 +148,8 @@ std::vector grid_influence_function( auto prefactor = Utils::int_pow<3>(static_cast(params.mesh[0])) * 2. * Utils::int_pow<2>(inv_box_l[0]); - auto const offset = detail::calc_meshift(params.mesh, false)[0]; - auto const d_op = detail::calc_meshift(params.mesh, true)[0]; + auto const offset = calc_p3m_mesh_shift(params.mesh, false)[0]; + auto const d_op = calc_p3m_mesh_shift(params.mesh, true)[0]; auto const half_mesh = params.mesh[0] / 2; auto indices = Utils::Vector3i{}; auto shift_off = Utils::Vector3i{}; @@ -137,8 +161,8 @@ std::vector grid_influence_function( [&]() { if (((indices[0] % half_mesh != 0) or (indices[1] % half_mesh != 0) or (indices[2] % half_mesh != 0))) { - g[index] = FloatType(prefactor * - G_opt_dipolar(params, shift_off, d_op_off)); + g[index] = FloatType( + prefactor * G_opt_dipolar(params, shift_off, d_op_off)); } ++index; }, @@ -150,12 +174,12 @@ std::vector grid_influence_function( return g; } +template inline double G_opt_dipolar_self_energy(P3MParameters const ¶ms, Utils::Vector3i const &shift) { - auto constexpr limit = P3M_BRILLOUIN + 1; - auto constexpr m_start = Utils::Vector3i::broadcast(-limit); - auto constexpr m_stop = Utils::Vector3i::broadcast(limit + 1); + auto constexpr m_start = Utils::Vector3i::broadcast(-m); + auto constexpr m_stop = Utils::Vector3i::broadcast(m + 1); auto const cao = params.cao; auto const mesh = params.mesh[0]; auto const offset = @@ -177,19 +201,21 @@ inline double G_opt_dipolar_self_energy(P3MParameters const ¶ms, /** * @brief Calculate self-energy of the influence function. * + * @tparam m Number of Brillouin zones that contribute to the aliasing sums + * * @param params DP3M parameters - * @param n_start Lower left corner of the grid - * @param n_stop Upper right corner of the grid. + * @param n_start Lower left corner of the grid in k-space. + * @param n_stop Upper right corner of the grid in k-space. * @param g Energies on the grid. * @return Total self-energy. */ -template +template inline double grid_influence_function_self_energy( P3MParameters const ¶ms, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, std::vector const &g) { - auto const offset = detail::calc_meshift(params.mesh, false)[0]; - auto const d_op = detail::calc_meshift(params.mesh, true)[0]; + auto const offset = calc_p3m_mesh_shift(params.mesh, false)[0]; + auto const d_op = calc_p3m_mesh_shift(params.mesh, true)[0]; auto const half_mesh = params.mesh[0] / 2; auto indices = Utils::Vector3i{}; auto shift_off = Utils::Vector3i{}; @@ -202,8 +228,8 @@ inline double grid_influence_function_self_energy( [&]() { if (((indices[0] % half_mesh != 0) or (indices[1] % half_mesh != 0) or (indices[2] % half_mesh != 0))) { - auto const U2 = G_opt_dipolar_self_energy(params, shift_off); - energy += double(g[index]) * U2 * d_op_off.norm2(); + auto const U2 = G_opt_dipolar_self_energy(params, shift_off); + energy += static_cast(g[index]) * U2 * d_op_off.norm2(); } ++index; }, diff --git a/src/core/unit_tests/p3m_test.cpp b/src/core/unit_tests/p3m_test.cpp index 3f091d565c..eea67334c0 100644 --- a/src/core/unit_tests/p3m_test.cpp +++ b/src/core/unit_tests/p3m_test.cpp @@ -35,7 +35,7 @@ BOOST_AUTO_TEST_CASE(calc_meshift_false) { std::vector{0, 1, 2, 3, -3, -2, -1}}}; auto const mesh = Utils::Vector3i{{1, 4, 7}}; - auto const val = detail::calc_meshift(mesh, false); + auto const val = calc_p3m_mesh_shift(mesh, false); for (std::size_t i = 0u; i < 3u; ++i) { for (std::size_t j = 0u; j < ref[i].size(); ++j) { @@ -50,7 +50,7 @@ BOOST_AUTO_TEST_CASE(calc_meshift_true) { std::vector{0, 1, 2, 0, -3, -2, -1}}}; auto const mesh = Utils::Vector3i{{1, 4, 7}}; - auto const val = detail::calc_meshift(mesh, true); + auto const val = calc_p3m_mesh_shift(mesh, true); for (std::size_t i = 0u; i < 3u; ++i) { for (std::size_t j = 0u; j < ref[i].size(); ++j) { From 75f0812180c38cc077ce536fe8aa0ebb418e850f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 12 Dec 2024 15:31:46 +0100 Subject: [PATCH 4/4] Skip infinite values in DLC parameter tuning --- src/core/magnetostatics/dlc.cpp | 18 ++++++-- .../electrostatics/CoulombP3M.hpp | 41 +++++++++++-------- .../magnetostatics/DipolarP3M.hpp | 41 +++++++++++-------- testsuite/python/p3m_tuning_exceptions.py | 5 ++- 4 files changed, 66 insertions(+), 39 deletions(-) diff --git a/src/core/magnetostatics/dlc.cpp b/src/core/magnetostatics/dlc.cpp index 15d820d3c8..183564e818 100644 --- a/src/core/magnetostatics/dlc.cpp +++ b/src/core/magnetostatics/dlc.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -430,15 +431,26 @@ double DipolarLayerCorrection::tune_far_cut() const { } auto constexpr limitkc = 200; + auto constexpr exp_max = +709.8; // for IEEE-compatible double + auto constexpr exp_min = -708.4; // for IEEE-compatible double + auto const log_max = std::log(std::numeric_limits::max()); auto const piarea = std::numbers::pi / (lx * ly); auto const nmp = static_cast(count_magnetic_particles(system)); auto const h = dlc.box_h; auto far_cut = -1.; for (int kc = 1; kc < limitkc; kc++) { auto const gc = kc * 2. * std::numbers::pi / lx; - auto const fa0 = sqrt(9. * exp(+2. * gc * h) * g1_DLC_dip(gc, lz - h) + - 9. * exp(-2. * gc * h) * g1_DLC_dip(gc, lz + h) + - 22. * g1_DLC_dip(gc, lz)); + auto const pref1 = g1_DLC_dip(gc, lz - h); + auto const pref2 = g1_DLC_dip(gc, lz + h); + auto const pref0 = g1_DLC_dip(gc, lz); + auto const exp_term = 2. * gc * h; + auto const log_term1 = exp_term + std::log(9. * pref1); + if (exp_term >= exp_max or log_term1 >= log_max or -exp_term <= exp_min) { + // no solution can be found at larger k values due to overflow/underflow + break; + } + auto const fa0 = std::sqrt(9. * std::exp(+exp_term) * pref1 + + 9. * std::exp(-exp_term) * pref2 + 22. * pref0); auto const fa1 = sqrt(0.125 * piarea) * fa0; auto const fa2 = g2_DLC_dip(gc, lz); auto const de = nmp * mu_max_sq / (4. * (exp(gc * lz) - 1.)) * (fa1 + fa2); diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp index a641a6f625..5560fe7bff 100644 --- a/src/script_interface/electrostatics/CoulombP3M.hpp +++ b/src/script_interface/electrostatics/CoulombP3M.hpp @@ -116,28 +116,35 @@ class CoulombP3M : public Actor, ::CoulombP3M> { m_tune_verbose = get_value(params, "verbose"); m_tune_limits = {std::nullopt, std::nullopt}; if (params.contains("tune_limits")) { - std::vector range; - try { - auto const val = get_value>(params, "tune_limits"); - assert(val.size() == 2u); - range.emplace_back(val[0u]); - range.emplace_back(val[1u]); - } catch (...) { - range = get_value>(params, "tune_limits"); - assert(range.size() == 2u); - } - if (not is_none(range[0u])) { - m_tune_limits.first = get_value(range[0u]); - } - if (not is_none(range[1u])) { - m_tune_limits.second = get_value(range[1u]); + auto const &variant = params.at("tune_limits"); + std::size_t range_length = 0u; + if (is_type>(variant)) { + auto const range = get_value>(variant); + range_length = range.size(); + if (range_length == 2u) { + m_tune_limits = {range[0u], range[1u]}; + } + } else { + auto const range = get_value>(variant); + range_length = range.size(); + if (range_length == 2u) { + if (not is_none(range[0u])) { + m_tune_limits.first = get_value(range[0u]); + } + if (not is_none(range[1u])) { + m_tune_limits.second = get_value(range[1u]); + } + } } context()->parallel_try_catch([&]() { + if (range_length != 2u) { + throw std::invalid_argument("Parameter 'tune_limits' needs 2 values"); + } if (m_tune_limits.first and *m_tune_limits.first <= 0) { - throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + throw std::domain_error("Parameter 'tune_limits' must be > 0"); } if (m_tune_limits.second and *m_tune_limits.second <= 0) { - throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + throw std::domain_error("Parameter 'tune_limits' must be > 0"); } }); } diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp index 8559171a27..4436bd58c5 100644 --- a/src/script_interface/magnetostatics/DipolarP3M.hpp +++ b/src/script_interface/magnetostatics/DipolarP3M.hpp @@ -111,28 +111,35 @@ class DipolarP3M : public Actor, ::DipolarP3M> { m_tune_verbose = get_value(params, "verbose"); m_tune_limits = {std::nullopt, std::nullopt}; if (params.contains("tune_limits")) { - std::vector range; - try { - auto const val = get_value>(params, "tune_limits"); - assert(val.size() == 2u); - range.emplace_back(val[0u]); - range.emplace_back(val[1u]); - } catch (...) { - range = get_value>(params, "tune_limits"); - assert(range.size() == 2u); - } - if (not is_none(range[0u])) { - m_tune_limits.first = get_value(range[0u]); - } - if (not is_none(range[1u])) { - m_tune_limits.second = get_value(range[1u]); + auto const &variant = params.at("tune_limits"); + std::size_t range_length = 0u; + if (is_type>(variant)) { + auto const range = get_value>(variant); + range_length = range.size(); + if (range_length == 2u) { + m_tune_limits = {range[0u], range[1u]}; + } + } else { + auto const range = get_value>(variant); + range_length = range.size(); + if (range_length == 2u) { + if (not is_none(range[0u])) { + m_tune_limits.first = get_value(range[0u]); + } + if (not is_none(range[1u])) { + m_tune_limits.second = get_value(range[1u]); + } + } } context()->parallel_try_catch([&]() { + if (range_length != 2u) { + throw std::invalid_argument("Parameter 'tune_limits' needs 2 values"); + } if (m_tune_limits.first and *m_tune_limits.first <= 0) { - throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + throw std::domain_error("Parameter 'tune_limits' must be > 0"); } if (m_tune_limits.second and *m_tune_limits.second <= 0) { - throw std::domain_error("P3M mesh tuning limits: mesh must be > 0"); + throw std::domain_error("Parameter 'tune_limits' must be > 0"); } }); } diff --git a/testsuite/python/p3m_tuning_exceptions.py b/testsuite/python/p3m_tuning_exceptions.py index 8ccfa6f569..e7aa4e82dc 100644 --- a/testsuite/python/p3m_tuning_exceptions.py +++ b/testsuite/python/p3m_tuning_exceptions.py @@ -205,8 +205,9 @@ def check_invalid_params(self, container, class_solver, **custom_params): ('alpha', -2.0, "Parameter 'alpha' must be > 0"), ('accuracy', -2.0, "Parameter 'accuracy' must be > 0"), ('mesh', (-1, -1, -1), "Parameter 'mesh' must be > 0"), - ('tune_limits', (-1, 1), "P3M mesh tuning limits: mesh must be > 0"), - ('tune_limits', (1, 0), "P3M mesh tuning limits: mesh must be > 0"), + ('tune_limits', (-1,), "Parameter 'tune_limits' needs 2 values"), + ('tune_limits', (-1, 1), "Parameter 'tune_limits' must be > 0"), + ('tune_limits', (1, 0), "Parameter 'tune_limits' must be > 0"), ('mesh', (2, 2, 2), "Parameter 'cao' cannot be larger than 'mesh'"), ('mesh_off', (-2, 1, 1), "Parameter 'mesh_off' must be >= 0 and <= 1"), ]