diff --git a/src/core/actor/visit_try_catch.hpp b/src/core/actor/visit_try_catch.hpp index 33b85200ae8..cc4feccf6ff 100644 --- a/src/core/actor/visit_try_catch.hpp +++ b/src/core/actor/visit_try_catch.hpp @@ -29,27 +29,21 @@ /** @brief Run a kernel on a variant and queue errors. */ template -bool visit_active_actor_try_catch(Visitor &&visitor, Variant &actor) { - bool failed = false; +void visit_active_actor_try_catch(Visitor &&visitor, Variant &actor) { try { boost::apply_visitor(visitor, actor); } catch (std::runtime_error const &err) { runtimeErrorMsg() << err.what(); - failed = true; } - return failed; } /** @brief Run a kernel on a variant and queue errors. */ template -bool visit_active_actor_try_catch(Visitor &&visitor, +void visit_active_actor_try_catch(Visitor &&visitor, boost::optional &actor) { - bool failed = false; if (actor) { - failed = - visit_active_actor_try_catch(std::forward(visitor), *actor); + visit_active_actor_try_catch(std::forward(visitor), *actor); } - return failed; } #endif diff --git a/src/core/electrostatics/coulomb.cpp b/src/core/electrostatics/coulomb.cpp index 8f7b72a11d9..fce7aa51482 100644 --- a/src/core/electrostatics/coulomb.cpp +++ b/src/core/electrostatics/coulomb.cpp @@ -61,13 +61,11 @@ boost::optional electrostatics_extension; namespace Coulomb { -bool sanity_checks() { - return visit_active_actor_try_catch( - [](auto &actor) { - actor->sanity_checks(); - actor->sanity_checks_charge_neutrality(); - }, - electrostatics_actor); +void sanity_checks() { + if (electrostatics_actor) { + boost::apply_visitor([](auto &actor) { actor->sanity_checks(); }, + *electrostatics_actor); + } } void on_coulomb_change() { diff --git a/src/core/electrostatics/coulomb.hpp b/src/core/electrostatics/coulomb.hpp index e7d66f14e28..ff6395f5cba 100644 --- a/src/core/electrostatics/coulomb.hpp +++ b/src/core/electrostatics/coulomb.hpp @@ -138,7 +138,7 @@ void check_charge_neutrality(double relative_tolerance); Utils::Vector9d calc_pressure_long_range(ParticleRange const &particles); -bool sanity_checks(); +void sanity_checks(); double cutoff(Utils::Vector3d const &box_l); void on_observable_calc(); diff --git a/src/core/electrostatics/debye_hueckel.hpp b/src/core/electrostatics/debye_hueckel.hpp index 6f2b86a4934..1994b5e2bf7 100644 --- a/src/core/electrostatics/debye_hueckel.hpp +++ b/src/core/electrostatics/debye_hueckel.hpp @@ -61,7 +61,7 @@ struct DebyeHueckel : public Coulomb::Actor { this->r_cut = r_cut; } - void on_activation() const { sanity_checks_charge_neutrality(); } + void on_activation() const { sanity_checks(); } void on_boxl_change() const {} void on_node_grid_change() const {} void on_periodicity_change() const {} diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index f3728b64134..16175649cf5 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -33,6 +33,7 @@ #include "electrostatics/p3m_gpu.hpp" #include "electrostatics/p3m_gpu_error.hpp" +#include "p3m/TuningAlgorithm.hpp" #include "p3m/TuningLogger.hpp" #include "p3m/fft.hpp" #include "p3m/influence_function.hpp" @@ -58,10 +59,10 @@ #include #include +#include #include #include #include -#include #include #include @@ -189,7 +190,7 @@ static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, auto const mesh_i = Utils::hadamard_division(Utils::Vector3d::broadcast(1.), mesh); auto const alpha_L_i = 1. / alpha_L; - double he_q = 0.0; + auto he_q = 0.; for (int nx = -mesh[0] / 2; nx < mesh[0] / 2; nx++) { auto const ctan_x = p3m_analytic_cotangent_sum(nx, mesh_i[0], cao); @@ -396,30 +397,30 @@ Utils::Vector9d CoulombP3M::p3m_calc_kspace_pressure_tensor() { double diagonal = 0; int ind = 0; int j[3]; - auto const half_alpha_inv_sq = Utils::sqr(1.0 / 2.0 / p3m.params.alpha); + auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha); for (j[0] = 0; j[0] < p3m.fft.plan[3].new_mesh[RX]; j[0]++) { for (j[1] = 0; j[1] < p3m.fft.plan[3].new_mesh[RY]; j[1]++) { for (j[2] = 0; j[2] < p3m.fft.plan[3].new_mesh[RZ]; j[2]++) { - auto const kx = 2.0 * Utils::pi() * + auto const kx = 2. * Utils::pi() * p3m.d_op[RX][j[KX] + p3m.fft.plan[3].start[KX]] * box_geo.length_inv()[RX]; - auto const ky = 2.0 * Utils::pi() * + auto const ky = 2. * Utils::pi() * p3m.d_op[RY][j[KY] + p3m.fft.plan[3].start[KY]] * box_geo.length_inv()[RY]; - auto const kz = 2.0 * Utils::pi() * + auto const kz = 2. * Utils::pi() * p3m.d_op[RZ][j[KZ] + p3m.fft.plan[3].start[KZ]] * box_geo.length_inv()[RZ]; auto const sqk = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz); auto const node_k_space_energy = (sqk == 0) - ? 0.0 + ? 0. : p3m.g_energy[ind] * (Utils::sqr(p3m.rs_mesh[2 * ind]) + Utils::sqr(p3m.rs_mesh[2 * ind + 1])); ind++; auto const vterm = - (sqk == 0) ? 0. : -2.0 * (1 / sqk + half_alpha_inv_sq); + (sqk == 0) ? 0. : -2. * (1. / sqk + half_alpha_inv_sq); diagonal += node_k_space_energy; auto const prefactor = node_k_space_energy * vterm; @@ -546,495 +547,160 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag, return prefactor * energy; } - return 0.0; + return 0.; } -/** Get the minimal error for this combination of parameters. - * - * The real space error is tuned such that it contributes half of the - * total error, and then the Fourier space error is calculated. - * If an optimal alpha is not found, the value 0.1 is used as fallback. - * @param[in,out] p3m P3M state - * @param[in] prefactor Electrostatics prefactor - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao @copybrief P3MParameters::cao - * @param[in] r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[out] _alpha_L @copybrief P3MParameters::alpha_L - * @param[out] _rs_err real-space error - * @param[out] _ks_err k-space error - * @returns Error magnitude - */ -static double p3m_get_accuracy(p3m_data_struct const &p3m, double prefactor, - Utils::Vector3i const &mesh, int cao, - double r_cut_iL, double *_alpha_L, - double *_rs_err, double *_ks_err) { - double rs_err, ks_err; - double alpha_L; - - /* calc maximal real space error for setting */ - rs_err = - p3m_real_space_error(prefactor, r_cut_iL, p3m.sum_qpart, p3m.sum_q2, 0); - - if (Utils::sqrt_2() * rs_err > p3m.params.accuracy) { - /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */ - alpha_L = - sqrt(log(Utils::sqrt_2() * rs_err / p3m.params.accuracy)) / r_cut_iL; - } else { - /* even alpha=0 is ok, however, we cannot choose it since it kills the - k-space error formula. - Anyways, this very likely NOT the optimal solution */ - alpha_L = 0.1; - } - - *_alpha_L = alpha_L; - /* calculate real space and k-space error for this alpha_L */ - rs_err = p3m_real_space_error(prefactor, r_cut_iL, p3m.sum_qpart, p3m.sum_q2, - alpha_L); -#ifdef CUDA - if (has_actor_of_type(electrostatics_actor)) { - ks_err = p3mgpu_k_space_error(prefactor, mesh, cao, p3m.sum_qpart, - p3m.sum_q2, alpha_L); - } else -#endif - ks_err = p3m_k_space_error(prefactor, mesh, cao, p3m.sum_qpart, p3m.sum_q2, - alpha_L); - - *_rs_err = rs_err; - *_ks_err = ks_err; - return sqrt(Utils::sqr(rs_err) + Utils::sqr(ks_err)); -} - -/** Get the computation time for some @p mesh, @p cao, @p r_cut and @p alpha. - * - * @param[in,out] p3m P3M state - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao @copybrief P3MParameters::cao - * @param[in] r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[in] alpha_L @copybrief P3MParameters::alpha_L - * @param[in] timings Number of test force calculations - * - * @returns The integration time in case of success, otherwise throw - */ -static double p3m_mcr_time(p3m_data_struct &p3m, Utils::Vector3i const &mesh, - int cao, double r_cut_iL, double alpha_L, - int timings) { - - /* commit p3m parameters for test run */ - p3m.params.r_cut = r_cut_iL * box_geo.length()[0]; - p3m.params.r_cut_iL = r_cut_iL; - p3m.params.cao = cao; - p3m.params.alpha_L = alpha_L; - p3m.params.alpha = alpha_L * box_geo.length_inv()[0]; - p3m.params.mesh = mesh; - - on_coulomb_change(); - - /* perform force calculation test */ - return benchmark_integration_step(timings); -} - -/** Get the optimal alpha and the corresponding computation time for a fixed - * @p mesh and @p cao. - * - * The @p _r_cut_iL is determined via a simple bisection. - * - * @param[in,out] p3m P3M state - * @param[in] prefactor Electrostatics prefactor - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao @copybrief P3MParameters::cao - * @param[in] r_cut_iL_min lower bound for @p _r_cut_iL - * @param[in] r_cut_iL_max upper bound for @p _r_cut_iL - * @param[out] _r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[out] _alpha_L @copybrief P3MParameters::alpha_L - * @param[out] _accuracy @copybrief P3MParameters::accuracy - * @param[in] timings Number of test force calculations - * @param[in,out] logger Logging instance - * - * @returns The integration time in case of success, otherwise - * -@ref P3M_TUNE_ACCURACY_TOO_LARGE, - * -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELCTEST - */ -static double p3m_mc_time(p3m_data_struct &p3m, double prefactor, - Utils::Vector3i const &mesh, int cao, - double r_cut_iL_min, double r_cut_iL_max, - double *_r_cut_iL, double *_alpha_L, - double *_accuracy, int timings, - TuningLogger &logger) { - double rs_err, ks_err; - - /* initial checks. */ - auto const k_cut_per_dir = (static_cast(cao) / 2.) * - Utils::hadamard_division(box_geo.length(), mesh); - auto const k_cut = *boost::min_element(k_cut_per_dir); - auto const min_box_l = *boost::min_element(box_geo.length()); - auto const min_local_box_l = *boost::min_element(local_geo.length()); - auto const k_cut_max = std::min(min_box_l, min_local_box_l) - skin; - - if (cao >= *boost::min_element(mesh) or k_cut >= k_cut_max) { - logger.log_cao_too_large(mesh[0], cao); - return -P3M_TUNE_CAO_TOO_LARGE; - } +class CoulombTuningAlgorithm : public TuningAlgorithm { + p3m_data_struct &p3m; + double m_mesh_density_min = -1., m_mesh_density_max = -1.; + // indicates if mesh should be tuned + bool m_tune_mesh = false; - *_accuracy = p3m_get_accuracy(p3m, prefactor, mesh, cao, r_cut_iL_max, - _alpha_L, &rs_err, &ks_err); - - /* Either low and high boundary are equal (for fixed cut), or the low border - is initially 0 and therefore - has infinite error estimate, as required. Therefore if the high boundary - fails, there is no possible r_cut */ - if (*_accuracy > p3m.params.accuracy) { - logger.log_skip("accuracy not achieved", mesh[0], cao, r_cut_iL_max, - *_alpha_L, *_accuracy, rs_err, ks_err); - return -P3M_TUNE_ACCURACY_TOO_LARGE; - } - - double r_cut_iL; - for (;;) { - r_cut_iL = 0.5 * (r_cut_iL_min + r_cut_iL_max); +public: + CoulombTuningAlgorithm(p3m_data_struct &input_p3m, double prefactor, + int timings) + : TuningAlgorithm{prefactor, timings}, p3m{input_p3m} {} - if (r_cut_iL_max - r_cut_iL_min < P3M_RCUT_PREC) - break; + P3MParameters &get_params() override { return p3m.params; } - /* bisection */ - auto const accuracy = p3m_get_accuracy(p3m, prefactor, mesh, cao, r_cut_iL, - _alpha_L, &rs_err, &ks_err); - if (accuracy > p3m.params.accuracy) - r_cut_iL_min = r_cut_iL; - else - r_cut_iL_max = r_cut_iL; - } + void on_solver_change() const override { on_coulomb_change(); } - /* final result is always the upper interval boundary, since only there - we know that the desired minimal accuracy is obtained */ - *_r_cut_iL = r_cut_iL = r_cut_iL_max; - - /* if we are running P3M+ELC, check that r_cut is compatible */ - if (auto elc_actor = get_actor_by_type( - electrostatics_actor)) { - auto const r_cut = r_cut_iL * box_geo.length()[0]; - auto const veto = elc_actor->veto_r_cut(r_cut); - if (not veto.empty()) { - logger.log_skip(veto, mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, - rs_err, ks_err); - return -P3M_TUNE_ELCTEST; + void setup_logger(bool verbose) { +#ifdef CUDA + auto const on_gpu = has_actor_of_type(electrostatics_actor); +#else + auto const on_gpu = false; +#endif + m_logger = std::make_unique( + verbose and this_node == 0, (on_gpu) ? "CoulombP3MGPU" : "CoulombP3M", + TuningLogger::Mode::Coulomb); + m_logger->tuning_goals(p3m.params.accuracy, m_prefactor, + box_geo.length()[0], p3m.sum_qpart, p3m.sum_q2); + m_logger->log_tuning_start(); + } + + std::string layer_correction_veto_r_cut(double r_cut) const override { + if (auto elc_actor = get_actor_by_type( + electrostatics_actor)) { + return elc_actor->veto_r_cut(r_cut); } + return {}; } - auto const int_time = - p3m_mcr_time(p3m, mesh, cao, r_cut_iL, *_alpha_L, timings); - - *_accuracy = p3m_get_accuracy(p3m, prefactor, mesh, cao, r_cut_iL, _alpha_L, - &rs_err, &ks_err); + std::tuple + calculate_accuracy(Utils::Vector3i const &mesh, int cao, + double r_cut_iL) const override { - logger.log_success(int_time, mesh[0], cao, r_cut_iL, *_alpha_L, *_accuracy, - rs_err, ks_err); - logger.increment_n_trials(); - return int_time; -} + double alpha_L, rs_err, ks_err; -/** Get the optimal alpha and the corresponding computation time for a fixed - * @p mesh. - * - * @p _cao should contain an initial guess, which is then adapted by stepping - * up and down. - * - * @param[in,out] p3m P3M state - * @param[in] prefactor Electrostatics prefactor - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao_min lower bound for @p _cao - * @param[in] cao_max upper bound for @p _cao - * @param[in,out] _cao initial guess for the - * @copybrief P3MParameters::cao - * @param[in] r_cut_iL_min lower bound for @p _r_cut_iL - * @param[in] r_cut_iL_max upper bound for @p _r_cut_iL - * @param[out] _r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[out] _alpha_L @copybrief P3MParameters::alpha_L - * @param[out] _accuracy @copybrief P3MParameters::accuracy - * @param[in] timings Number of test force calculations - * @param[in,out] logger Logging instance - * - * @returns The integration time in case of success, otherwise - * -@ref P3M_TUNE_CAO_TOO_LARGE - */ -static double p3m_m_time(p3m_data_struct &p3m, double prefactor, - Utils::Vector3i const &mesh, int cao_min, int cao_max, - int *_cao, double r_cut_iL_min, double r_cut_iL_max, - double *_r_cut_iL, double *_alpha_L, double *_accuracy, - int timings, TuningLogger &logger) { - double best_time = -1., tmp_time, tmp_r_cut_iL = 0., tmp_alpha_L = 0., - tmp_accuracy = 0.; - /* in which direction improvement is possible. Initially, we don't know it - * yet. */ - int final_dir = 0; - int cao = *_cao; - - /* the initial step sets a timing mark. If there is no valid r_cut, we can - * only try to increase cao to increase the obtainable precision of the far - * formula. */ - do { - tmp_time = p3m_mc_time(p3m, prefactor, mesh, cao, r_cut_iL_min, - r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, - &tmp_accuracy, timings, logger); - /* cao is too large for this grid, but still the accuracy cannot be - * achieved, give up */ - if (tmp_time == -P3M_TUNE_CAO_TOO_LARGE) { - return tmp_time; - } - /* we have a valid time, start optimising from there */ - if (tmp_time >= 0.) { - best_time = tmp_time; - *_r_cut_iL = tmp_r_cut_iL; - *_alpha_L = tmp_alpha_L; - *_accuracy = tmp_accuracy; - *_cao = cao; - break; - } - /* the required accuracy could not be obtained, try higher caos */ - cao++; - final_dir = 1; - } while (cao <= cao_max); - /* with this mesh, the required accuracy cannot be obtained. */ - if (cao > cao_max) - return -P3M_TUNE_CAO_TOO_LARGE; - - /* at the boundaries, only the opposite direction can be used for - * optimisation - */ - if (cao == cao_min) - final_dir = 1; - else if (cao == cao_max) - final_dir = -1; - - if (final_dir == 0) { - /* check in which direction we can optimise. Both directions are possible - */ - double dir_times[3]; - for (final_dir = -1; final_dir <= 1; final_dir += 2) { - dir_times[final_dir + 1] = tmp_time = p3m_mc_time( - p3m, prefactor, mesh, cao + final_dir, r_cut_iL_min, r_cut_iL_max, - &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy, timings, logger); - /* in this direction, we cannot optimise, since we get into precision - * trouble */ - if (tmp_time < 0.) - continue; + /* calc maximal real space error for setting */ + rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart, + p3m.sum_q2, 0.); - if (tmp_time < best_time) { - best_time = tmp_time; - *_r_cut_iL = tmp_r_cut_iL; - *_alpha_L = tmp_alpha_L; - *_accuracy = tmp_accuracy; - *_cao = cao + final_dir; - } - } - /* choose the direction which was optimal, if any of the two */ - if (dir_times[0] == best_time) { - final_dir = -1; - } else if (dir_times[2] == best_time) { - final_dir = 1; + if (Utils::sqrt_2() * rs_err > p3m.params.accuracy) { + /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */ + alpha_L = + sqrt(log(Utils::sqrt_2() * rs_err / p3m.params.accuracy)) / r_cut_iL; } else { - /* no improvement in either direction, however if one is only marginally - * worse, we can still try */ - /* down is possible and not much worse, while up is either illegal or - * even - * worse */ - if ((dir_times[0] >= 0 && dir_times[0] < best_time + P3M_TIME_GRAN) && - (dir_times[2] < 0 || dir_times[2] > dir_times[0])) - final_dir = -1; - /* same for up */ - else if ((dir_times[2] >= 0 && - dir_times[2] < best_time + P3M_TIME_GRAN) && - (dir_times[0] < 0 || dir_times[0] > dir_times[2])) - final_dir = 1; - else { - /* really no chance for optimisation */ - return best_time; - } + /* even alpha=0 is ok, however, we cannot choose it since it kills the + k-space error formula. + Anyways, this very likely NOT the optimal solution */ + alpha_L = 0.1; } - /* we already checked the initial cao and its neighbor */ - cao += 2 * final_dir; - } else { - /* here some constraint is active, and we only checked the initial cao - * itself */ - cao += final_dir; - } - - /* move cao into the optimisation direction until we do not gain anymore. */ - for (; cao >= cao_min && cao <= cao_max; cao += final_dir) { - tmp_time = p3m_mc_time(p3m, prefactor, mesh, cao, r_cut_iL_min, - r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, - &tmp_accuracy, timings, logger); - /* if we cannot meet the precision anymore, give up */ - if (tmp_time < 0.) - break; - - if (tmp_time < best_time) { - best_time = tmp_time; - *_r_cut_iL = tmp_r_cut_iL; - *_alpha_L = tmp_alpha_L; - *_accuracy = tmp_accuracy; - *_cao = cao; - } else if (tmp_time > best_time + P3M_TIME_GRAN) { - /* no hope of further optimisation */ - break; - } - } - return best_time; -} - -void CoulombP3M::adaptive_tune() { - auto constexpr max_n_consecutive_trials = 20; - auto constexpr time_sentinel = std::numeric_limits::max(); - double r_cut_iL_min, r_cut_iL_max, r_cut_iL = -1., tmp_r_cut_iL = 0.; - int cao_min, cao_max, cao = -1, tmp_cao; - double alpha_L = -1., tmp_alpha_L = 0.; - double accuracy = -1., tmp_accuracy = 0.; - double time_best = time_sentinel; - double mesh_density_min, mesh_density_max; - bool tune_mesh = false; // indicates if mesh should be tuned - - sanity_checks_boxl(); - sanity_checks_cell_structure(); - - /* preparation */ - count_charged_particles(); - - if (p3m.sum_qpart == 0) { - throw std::runtime_error("CoulombP3M: no charged particles in the system"); - } + /* calculate real-space and k-space error for this alpha_L */ + rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart, + p3m.sum_q2, alpha_L); #ifdef CUDA - auto const on_gpu = has_actor_of_type(electrostatics_actor); -#else - auto const on_gpu = false; + if (has_actor_of_type(electrostatics_actor)) { + ks_err = p3mgpu_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart, + p3m.sum_q2, alpha_L); + } else #endif - auto logger = TuningLogger{tune_verbose and this_node == 0, - (on_gpu) ? "CoulombP3MGPU" : "CoulombP3M", - TuningLogger::Mode::Coulomb}; - logger.tuning_goals(p3m.params.accuracy, prefactor, box_geo.length()[0], - p3m.sum_qpart, p3m.sum_q2); - - /* Activate tuning mode */ - p3m.params.tuning = true; - - /* parameter ranges */ - if (p3m.params.mesh == Utils::Vector3i::broadcast(-1)) { - /* simple heuristic to limit the tried meshes if the accuracy cannot - be obtained with smaller meshes, but normally not all these - meshes have to be tested */ - mesh_density_min = - std::cbrt(static_cast(p3m.sum_qpart) / box_geo.volume()); - /* avoid using more than 1 GB of FFT arrays */ - mesh_density_max = 512. / std::cbrt(box_geo.volume()); - tune_mesh = true; - } else if (p3m.params.mesh[1] == -1 and p3m.params.mesh[2] == -1) { - auto const mesh_density = mesh_density_min = mesh_density_max = - static_cast(p3m.params.mesh[0]) * box_geo.length_inv()[0]; - for (int i : {1, 2}) { - p3m.params.mesh[i] = - static_cast(std::round(mesh_density * box_geo.length()[i])); - // Make sure that the mesh is even in all directions - p3m.params.mesh[i] += p3m.params.mesh[i] % 2; - } - logger.fixed_mesh(p3m.params.mesh); - } else { - mesh_density_min = mesh_density_max = - p3m.params.mesh[0] * box_geo.length_inv()[0]; - logger.fixed_mesh(p3m.params.mesh); - } + ks_err = p3m_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart, + p3m.sum_q2, alpha_L); - if (p3m.params.r_cut_iL == 0.) { - auto const min_box_l = *boost::min_element(box_geo.length()); - auto const min_local_box_l = *boost::min_element(local_geo.length()); - - r_cut_iL_min = 0; - r_cut_iL_max = std::min(min_local_box_l, min_box_l / 2.) - skin; - r_cut_iL_min *= box_geo.length_inv()[0]; - r_cut_iL_max *= box_geo.length_inv()[0]; - } else { - r_cut_iL_min = r_cut_iL_max = p3m.params.r_cut_iL; - logger.fixed_r_cut_iL(p3m.params.r_cut_iL); + return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L}; } - if (p3m.params.cao == -1) { - cao_min = 1; - cao_max = 7; - cao = cao_max; - } else { - cao_min = cao_max = cao = p3m.params.cao; - logger.fixed_cao(p3m.params.cao); - } + void determine_mesh_limits() override { + auto const mesh_density = + static_cast(p3m.params.mesh[0]) * box_geo.length_inv()[0]; - logger.log_tuning_start(); - - /* mesh loop */ - /* we're tuning the density of mesh points, which is the same in every - * direction. */ - Utils::Vector3i mesh = {0, 0, 0}; - for (auto mesh_density = mesh_density_min; mesh_density <= mesh_density_max; - mesh_density += 0.1) { - tmp_cao = cao; - - Utils::Vector3i tmp_mesh; - if (tune_mesh) { - for (int i : {0, 1, 2}) { - tmp_mesh[i] = - static_cast(std::round(box_geo.length()[i] * mesh_density)); - // Make sure that the mesh is even in all directions - tmp_mesh[i] += tmp_mesh[i] % 2; - } + if (p3m.params.mesh == Utils::Vector3i::broadcast(-1)) { + /* avoid using more than 1 GB of FFT arrays */ + auto const normalized_box_dim = std::cbrt(box_geo.volume()); + auto const max_npart_per_dim = 512.; + /* simple heuristic to limit the tried meshes if the accuracy cannot + be obtained with smaller meshes, but normally not all these + meshes have to be tested */ + auto const min_npart_per_dim = std::min( + max_npart_per_dim, std::cbrt(static_cast(p3m.sum_qpart))); + m_mesh_density_min = min_npart_per_dim / normalized_box_dim; + m_mesh_density_max = max_npart_per_dim / normalized_box_dim; + m_tune_mesh = true; } else { - tmp_mesh = p3m.params.mesh; + m_mesh_density_min = m_mesh_density_max = mesh_density; + assert(p3m.params.mesh[0] >= 1); + if (p3m.params.mesh[1] == -1 and p3m.params.mesh[2] == -1) { + // determine the two missing values by rescaling by the box length + for (int i : {1, 2}) { + p3m.params.mesh[i] = + static_cast(std::round(mesh_density * box_geo.length()[i])); + // make the mesh even in all directions + p3m.params.mesh[i] += p3m.params.mesh[i] % 2; + } + } + m_logger->report_fixed_mesh(p3m.params.mesh); } + } - auto const tmp_time = - p3m_m_time(p3m, prefactor, tmp_mesh, cao_min, cao_max, &tmp_cao, - r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, &tmp_alpha_L, - &tmp_accuracy, tune_timings, logger); + TuningAlgorithm::Parameters get_time() override { + auto tuned_params = TuningAlgorithm::Parameters{}; + auto time_best = time_sentinel; + for (auto mesh_density = m_mesh_density_min; + mesh_density <= m_mesh_density_max; mesh_density += 0.1) { + auto trial_params = TuningAlgorithm::Parameters{}; + if (m_tune_mesh) { + for (int i : {0, 1, 2}) { + trial_params.mesh[i] = + static_cast(std::round(box_geo.length()[i] * mesh_density)); + // make the mesh even in all directions + trial_params.mesh[i] += trial_params.mesh[i] % 2; + } + } else { + trial_params.mesh = p3m.params.mesh; + } + trial_params.cao = cao_best; - /* this mesh does not work at all */ - if (tmp_time < 0.) - continue; + auto const trial_time = + get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL, + trial_params.alpha_L, trial_params.accuracy); - /* the optimum r_cut for this mesh is the upper limit for higher meshes, - everything else is slower */ - if (has_actor_of_type(electrostatics_actor)) { - r_cut_iL_max = tmp_r_cut_iL; - } + /* this mesh does not work at all */ + if (trial_time < 0.) + continue; - if (tmp_time < time_best) { - /* new optimum */ - logger.reset_n_trials(); - time_best = tmp_time; - cao = tmp_cao; - r_cut_iL = tmp_r_cut_iL; - alpha_L = tmp_alpha_L; - accuracy = tmp_accuracy; - mesh = tmp_mesh; - } else if (tmp_time > time_best + P3M_TIME_GRAN or - logger.get_n_trials() > max_n_consecutive_trials) { - /* no hope of further optimisation */ - break; - } - } + /* the optimum r_cut for this mesh is the upper limit for higher meshes, + everything else is slower */ + if (has_actor_of_type(electrostatics_actor)) { + m_r_cut_iL_max = trial_params.r_cut_iL; + } - if (time_best == time_sentinel) { - throw std::runtime_error("CoulombP3M: failed to reach requested accuracy"); + if (trial_time < time_best) { + /* new optimum */ + reset_n_trials(); + tuned_params = trial_params; + time_best = tuned_params.time = trial_time; + } else if (trial_time > time_best + time_granularity or + get_n_trials() > max_n_consecutive_trials) { + /* no hope of further optimisation */ + break; + } + } + return tuned_params; } - - /* set tuned p3m parameters */ - p3m.params.tuning = false; - p3m.params.r_cut = r_cut_iL * box_geo.length()[0]; - p3m.params.r_cut_iL = r_cut_iL; - p3m.params.cao = cao; - p3m.params.alpha_L = alpha_L; - p3m.params.alpha = alpha_L * box_geo.length_inv()[0]; - p3m.params.accuracy = accuracy; - p3m.params.mesh = mesh; - - logger.tuning_results(mesh, cao, r_cut_iL, alpha_L, accuracy, time_best); - m_is_tuned = true; - on_coulomb_change(); -} +}; void CoulombP3M::tune() { if (p3m.params.alpha_L == 0. and p3m.params.alpha != 0.) { @@ -1044,8 +710,22 @@ void CoulombP3M::tune() { p3m.params.r_cut_iL = p3m.params.r_cut * box_geo.length_inv()[0]; } if (not is_tuned()) { + count_charged_particles(); + if (p3m.sum_qpart == 0) { + throw std::runtime_error( + "CoulombP3M: no charged particles in the system"); + } try { - adaptive_tune(); + CoulombTuningAlgorithm parameters(p3m, prefactor, tune_timings); + parameters.setup_logger(tune_verbose); + // parameter ranges + parameters.determine_mesh_limits(); + parameters.determine_r_cut_limits(); + parameters.determine_cao_limits(7); + // run tuning algorithm + parameters.tune(); + m_is_tuned = true; + on_coulomb_change(); } catch (...) { p3m.params.tuning = false; throw; @@ -1101,7 +781,6 @@ void CoulombP3M::sanity_checks_node_grid() const { throw std::runtime_error( "CoulombP3M: node grid must be sorted, largest first"); } - sanity_checks_boxl(); } void CoulombP3M::scaleby_box_l() { diff --git a/src/core/electrostatics/p3m.hpp b/src/core/electrostatics/p3m.hpp index bc31946deea..4f9548e136c 100644 --- a/src/core/electrostatics/p3m.hpp +++ b/src/core/electrostatics/p3m.hpp @@ -119,6 +119,7 @@ struct CoulombP3M : public Coulomb::Actor { init(); } void sanity_checks() const { + sanity_checks_boxl(); sanity_checks_node_grid(); sanity_checks_periodicity(); sanity_checks_cell_structure(); @@ -243,7 +244,6 @@ struct CoulombP3M : public Coulomb::Actor { void sanity_checks_periodicity() const; void sanity_checks_cell_structure() const; - void adaptive_tune(); void scaleby_box_l(); }; diff --git a/src/core/electrostatics/scafacos.hpp b/src/core/electrostatics/scafacos.hpp index 89e56016765..47db47a19d4 100644 --- a/src/core/electrostatics/scafacos.hpp +++ b/src/core/electrostatics/scafacos.hpp @@ -41,7 +41,7 @@ struct CoulombScafacos : virtual public ScafacosContextBase, void on_activation() { update_system_params(); - sanity_checks_charge_neutrality(); + sanity_checks(); tune(); } /** @brief Recalculate all box-length-dependent parameters. */ @@ -51,7 +51,7 @@ struct CoulombScafacos : virtual public ScafacosContextBase, void on_cell_structure_change() const {} void init() const {} - void sanity_checks() const override {} + void sanity_checks() const override { sanity_checks_charge_neutrality(); } bool is_tuned() const { return m_is_tuned; } void tune() { diff --git a/src/core/energy.cpp b/src/core/energy.cpp index 55c738f5e51..292831ae1a6 100644 --- a/src/core/energy.cpp +++ b/src/core/energy.cpp @@ -39,14 +39,17 @@ #include "electrostatics/coulomb.hpp" #include "magnetostatics/dipoles.hpp" +#include + #include static std::shared_ptr calculate_energy_local() { auto obs_energy_ptr = std::make_shared(1); - if (long_range_interactions_sanity_checks()) + if (long_range_interactions_sanity_checks()) { return obs_energy_ptr; + } auto &obs_energy = *obs_energy_ptr; diff --git a/src/core/event.cpp b/src/core/event.cpp index dfa6202cfff..c878e75e566 100644 --- a/src/core/event.cpp +++ b/src/core/event.cpp @@ -111,9 +111,7 @@ void on_integration_start(double time_step) { #ifdef NPT integrator_npt_sanity_checks(); #endif - if (long_range_interactions_sanity_checks()) { - runtimeErrorMsg() << "Long-range interactions returned an error."; - } + long_range_interactions_sanity_checks(); lb_lbfluid_sanity_checks(time_step); /********************************************/ diff --git a/src/core/interactions.cpp b/src/core/interactions.cpp index 1b402b7f1a2..691ee7441ad 100644 --- a/src/core/interactions.cpp +++ b/src/core/interactions.cpp @@ -23,13 +23,14 @@ #include "bonded_interactions/bonded_interaction_data.hpp" #include "collision.hpp" #include "electrostatics/coulomb.hpp" +#include "errorhandling.hpp" #include "event.hpp" #include "grid.hpp" #include "magnetostatics/dipoles.hpp" #include "serialization/IA_parameters.hpp" -#include +#include #include @@ -62,18 +63,18 @@ double maximal_cutoff() { } bool long_range_interactions_sanity_checks() { - /* set to zero if initialization was not successful. */ - bool failed = false; - + try { #ifdef ELECTROSTATICS - failed |= Coulomb::sanity_checks(); -#endif /* ifdef ELECTROSTATICS */ - + Coulomb::sanity_checks(); +#endif #ifdef DIPOLES - failed |= Dipoles::sanity_checks(); -#endif /* ifdef DIPOLES */ - - return failed; + Dipoles::sanity_checks(); +#endif + } catch (std::runtime_error const &err) { + runtimeErrorMsg() << err.what(); + return true; + } + return false; } void mpi_bcast_ia_params_local(int i, int j) { diff --git a/src/core/magnetostatics/dipoles.cpp b/src/core/magnetostatics/dipoles.cpp index 094150b4752..5cdc9655390 100644 --- a/src/core/magnetostatics/dipoles.cpp +++ b/src/core/magnetostatics/dipoles.cpp @@ -47,9 +47,11 @@ boost::optional magnetostatics_actor; namespace Dipoles { -bool sanity_checks() { - return visit_active_actor_try_catch( - [](auto &actor) { actor->sanity_checks(); }, magnetostatics_actor); +void sanity_checks() { + if (magnetostatics_actor) { + boost::apply_visitor([](auto &actor) { actor->sanity_checks(); }, + *magnetostatics_actor); + } } void on_dipoles_change() { diff --git a/src/core/magnetostatics/dipoles.hpp b/src/core/magnetostatics/dipoles.hpp index 640015f0b2c..44a68ea3e4d 100644 --- a/src/core/magnetostatics/dipoles.hpp +++ b/src/core/magnetostatics/dipoles.hpp @@ -94,7 +94,7 @@ using is_solver = std::is_convertible, MagnetostaticsActor>; void calc_pressure_long_range(); -bool sanity_checks(); +void sanity_checks(); double cutoff(Utils::Vector3d const &box_l); void on_observable_calc(); diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp index 9b8d3d299ab..dcde8cd5073 100644 --- a/src/core/magnetostatics/dp3m.cpp +++ b/src/core/magnetostatics/dp3m.cpp @@ -31,6 +31,7 @@ #include "magnetostatics/dp3m.hpp" +#include "p3m/TuningAlgorithm.hpp" #include "p3m/TuningLogger.hpp" #include "p3m/common.hpp" #include "p3m/fft.hpp" @@ -59,7 +60,6 @@ #include #include -#include #include #include @@ -568,440 +568,113 @@ void DipolarP3M::calc_influence_function_energy() { box_geo.length()); } -/** @copybrief p3m_get_accuracy - * - * The real space error is tuned such that it contributes half of the - * total error, and then the Fourier space error is calculated. - * If an optimal alpha is not found, the value 0.1 is used as fallback. - * @param[in] dp3m P3M state - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao @copybrief P3MParameters::cao - * @param[in] r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[out] _alpha_L @copybrief P3MParameters::alpha_L - * @param[out] _rs_err real space error - * @param[out] _ks_err Fourier space error - * @returns Error magnitude - */ -static double dp3m_get_accuracy(dp3m_data_struct const &dp3m, int mesh, int cao, - double r_cut_iL, double *_alpha_L, - double *_rs_err, double *_ks_err) { - double rs_err, ks_err; - double alpha_L; - - /* calc maximal real space error for setting */ - - // Alpha cannot be zero in the dipolar case because real_space formula breaks - // down - rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL, - dp3m.sum_dip_part, dp3m.sum_mu2, 0.001); - - if (Utils::sqrt_2() * rs_err > dp3m.params.accuracy) { - /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */ - alpha_L = dp3m_rtbisection(box_geo.length()[0], r_cut_iL, dp3m.sum_dip_part, - dp3m.sum_mu2, 0.0001 * box_geo.length()[0], - 5. * box_geo.length()[0], 0.0001, - dp3m.params.accuracy); - } - - else - /* even alpha=0 is ok, however, we cannot choose it since it kills the - k-space error formula. - Anyways, this very likely NOT the optimal solution */ - alpha_L = 0.1; - - *_alpha_L = alpha_L; - /* calculate real space and k-space error for this alpha_L */ - - rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL, - dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L); - ks_err = dp3m_k_space_error(box_geo.length()[0], mesh, cao, dp3m.sum_dip_part, - dp3m.sum_mu2, alpha_L); - - *_rs_err = rs_err; - *_ks_err = ks_err; - return sqrt(Utils::sqr(rs_err) + Utils::sqr(ks_err)); -} +class DipolarTuningAlgorithm : public TuningAlgorithm { + dp3m_data_struct &dp3m; + int m_mesh_max, m_mesh_min; -/** @copybrief p3m_mcr_time - * - * @param[in,out] dp3m P3M state - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao @copybrief P3MParameters::cao - * @param[in] r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[in] alpha_L @copybrief P3MParameters::alpha_L - * @param[in] timings Number of test force calculations - * - * @returns The integration time in case of success, otherwise throws - */ -static double dp3m_mcr_time(dp3m_data_struct &dp3m, int mesh, int cao, - double r_cut_iL, double alpha_L, int timings) { +public: + DipolarTuningAlgorithm(dp3m_data_struct &input_dp3m, double prefactor, + int timings) + : TuningAlgorithm{prefactor, timings}, dp3m{input_dp3m} {} - /* commit p3m parameters for test run */ - dp3m.params.r_cut = r_cut_iL * box_geo.length()[0]; - dp3m.params.r_cut_iL = r_cut_iL; - dp3m.params.cao = cao; - dp3m.params.alpha_L = alpha_L; - dp3m.params.alpha = alpha_L * box_geo.length_inv()[0]; - dp3m.params.mesh = Utils::Vector3i::broadcast(mesh); + P3MParameters &get_params() override { return dp3m.params; } - on_dipoles_change(); + void on_solver_change() const override { on_dipoles_change(); } - /* perform force calculation test */ - return benchmark_integration_step(timings); -} + std::string layer_correction_veto_r_cut(double) const override { return {}; } -/** @copybrief p3m_mc_time - * - * The @p _r_cut_iL is determined via a simple bisection. - * - * @param[in,out] dp3m P3M state - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao @copybrief P3MParameters::cao - * @param[in] r_cut_iL_min lower bound for @p _r_cut_iL - * @param[in] r_cut_iL_max upper bound for @p _r_cut_iL - * @param[out] _r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[out] _alpha_L @copybrief P3MParameters::alpha_L - * @param[out] _accuracy @copybrief P3MParameters::accuracy - * @param[in] timings Number of test force calculations - * @param[in,out] logger Logging instance - * - * @returns The integration time in case of success, otherwise - * -@ref P3M_TUNE_ACCURACY_TOO_LARGE, - * -@ref P3M_TUNE_CAO_TOO_LARGE, -@ref P3M_TUNE_ELCTEST, or - * -@ref P3M_TUNE_CUTOFF_TOO_LARGE - */ -static double dp3m_mc_time(dp3m_data_struct &dp3m, int mesh, int cao, - double r_cut_iL_min, double r_cut_iL_max, - double *_r_cut_iL, double *_alpha_L, - double *_accuracy, int timings, - TuningLogger &logger) { - double r_cut_iL; - double rs_err, ks_err; - - /* initial checks. */ - auto const mesh_size = box_geo.length()[0] / static_cast(mesh); - auto const k_cut = mesh_size * cao / 2.; - - auto const min_box_l = *boost::min_element(box_geo.length()); - auto const min_local_box_l = *boost::min_element(local_geo.length()); - - if (cao >= mesh || k_cut >= std::min(min_box_l, min_local_box_l) - skin) { - logger.log_cao_too_large(mesh, cao); - return -P3M_TUNE_CAO_TOO_LARGE; - } - - /* Either low and high boundary are equal (for fixed cut), or the low border - is initially 0 and therefore - has infinite error estimate, as required. Therefore if the high boundary - fails, there is no possible r_cut */ - *_accuracy = dp3m_get_accuracy(dp3m, mesh, cao, r_cut_iL_max, _alpha_L, - &rs_err, &ks_err); - if (*_accuracy > dp3m.params.accuracy) { - logger.log_skip("accuracy not achieved", mesh, cao, r_cut_iL_max, *_alpha_L, - *_accuracy, rs_err, ks_err); - return -P3M_TUNE_ACCURACY_TOO_LARGE; + void setup_logger(bool verbose) { + m_logger = std::make_unique( + verbose and this_node == 0, "DipolarP3M", TuningLogger::Mode::Dipolar); + m_logger->tuning_goals(dp3m.params.accuracy, m_prefactor, + box_geo.length()[0], dp3m.sum_dip_part, + dp3m.sum_mu2); + m_logger->log_tuning_start(); } - for (;;) { - r_cut_iL = 0.5 * (r_cut_iL_min + r_cut_iL_max); - - if (r_cut_iL_max - r_cut_iL_min < P3M_RCUT_PREC) - break; - - /* bisection */ - auto const tmp_accuracy = dp3m_get_accuracy(dp3m, mesh, cao, r_cut_iL, - _alpha_L, &rs_err, &ks_err); - if (tmp_accuracy > dp3m.params.accuracy) - r_cut_iL_min = r_cut_iL; - else - r_cut_iL_max = r_cut_iL; - } - /* final result is always the upper interval boundary, since only there - we know that the desired minimal accuracy is obtained */ - *_r_cut_iL = r_cut_iL = r_cut_iL_max; + std::tuple + calculate_accuracy(Utils::Vector3i const &mesh, int cao, + double r_cut_iL) const override { - auto const int_time = - dp3m_mcr_time(dp3m, mesh, cao, r_cut_iL, *_alpha_L, timings); + double alpha_L, rs_err, ks_err; - *_accuracy = - dp3m_get_accuracy(dp3m, mesh, cao, r_cut_iL, _alpha_L, &rs_err, &ks_err); + /* calc maximal real space error for setting */ + rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL, + dp3m.sum_dip_part, dp3m.sum_mu2, 0.001); + // alpha cannot be zero for dipoles because real-space formula breaks down - logger.log_success(int_time, mesh, cao, r_cut_iL, *_alpha_L, *_accuracy, - rs_err, ks_err); - logger.increment_n_trials(); - return int_time; -} - -/** @copybrief p3m_m_time - * - * @p _cao should contain an initial guess, which is then adapted by stepping - * up and down. - * - * @param[in,out] dp3m P3M state - * @param[in] mesh @copybrief P3MParameters::mesh - * @param[in] cao_min lower bound for @p _cao - * @param[in] cao_max upper bound for @p _cao - * @param[in,out] _cao initial guess for the - * @copybrief P3MParameters::cao - * @param[in] r_cut_iL_min lower bound for @p _r_cut_iL - * @param[in] r_cut_iL_max upper bound for @p _r_cut_iL - * @param[out] _r_cut_iL @copybrief P3MParameters::r_cut_iL - * @param[out] _alpha_L @copybrief P3MParameters::alpha_L - * @param[out] _accuracy @copybrief P3MParameters::accuracy - * @param[in] timings Number of test force calculations - * @param[in,out] logger Logging instance - * - * @returns The integration time in case of success, otherwise - * -@ref P3M_TUNE_CAO_TOO_LARGE */ -static double dp3m_m_time(dp3m_data_struct &dp3m, int mesh, int cao_min, - int cao_max, int *_cao, double r_cut_iL_min, - double r_cut_iL_max, double *_r_cut_iL, - double *_alpha_L, double *_accuracy, int timings, - TuningLogger &logger) { - double best_time = -1, tmp_r_cut_iL = -1., tmp_alpha_L = 0., - tmp_accuracy = 0.; - /* in which direction improvement is possible. Initially, we don't know it - * yet. */ - int final_dir = 0; - int cao = *_cao; - - /* the initial step sets a timing mark. If there is no valid r_cut, we can - * only try to increase cao to increase the obtainable precision of the far - * formula. */ - double tmp_time; - do { - tmp_time = - dp3m_mc_time(dp3m, mesh, cao, r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, - &tmp_alpha_L, &tmp_accuracy, timings, logger); - /* cao is too large for this grid, but still the accuracy cannot be - * achieved, give up */ - if (tmp_time == -P3M_TUNE_CAO_TOO_LARGE) { - return tmp_time; - } - /* we have a valid time, start optimising from there */ - if (tmp_time >= 0) { - best_time = tmp_time; - *_r_cut_iL = tmp_r_cut_iL; - *_alpha_L = tmp_alpha_L; - *_accuracy = tmp_accuracy; - *_cao = cao; - break; - } - /* the required accuracy could not be obtained, try higher caos */ - cao++; - final_dir = 1; - } while (cao <= cao_max); - /* with this mesh, the required accuracy cannot be obtained. */ - if (cao > cao_max) - return -P3M_TUNE_CAO_TOO_LARGE; - - /* at the boundaries, only the opposite direction can be used for optimisation - */ - if (cao == cao_min) - final_dir = 1; - else if (cao == cao_max) - final_dir = -1; - - if (final_dir == 0) { - /* check in which direction we can optimise. Both directions are possible */ - double dir_times[3]; - for (final_dir = -1; final_dir <= 1; final_dir += 2) { - dir_times[final_dir + 1] = tmp_time = dp3m_mc_time( - dp3m, mesh, cao + final_dir, r_cut_iL_min, r_cut_iL_max, - &tmp_r_cut_iL, &tmp_alpha_L, &tmp_accuracy, timings, logger); - /* in this direction, we cannot optimise, since we get into precision - * trouble */ - if (tmp_time < 0) - continue; - - if (tmp_time < best_time) { - best_time = tmp_time; - *_r_cut_iL = tmp_r_cut_iL; - *_alpha_L = tmp_alpha_L; - *_accuracy = tmp_accuracy; - *_cao = cao + final_dir; - } - } - /* choose the direction which was optimal, if any of the two */ - if (dir_times[0] == best_time) { - final_dir = -1; - } else if (dir_times[2] == best_time) { - final_dir = 1; + if (Utils::sqrt_2() * rs_err > dp3m.params.accuracy) { + /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */ + alpha_L = dp3m_rtbisection( + box_geo.length()[0], r_cut_iL, dp3m.sum_dip_part, dp3m.sum_mu2, + 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001, + dp3m.params.accuracy); } else { - /* no improvement in either direction, however if one is only marginally - * worse, we can still try; down is possible and not much worse, while - * up is either illegal or even worse */ - if ((dir_times[0] >= 0 && dir_times[0] < best_time + P3M_TIME_GRAN) && - (dir_times[2] < 0 || dir_times[2] > dir_times[0])) - final_dir = -1; - /* same for up */ - else if ((dir_times[2] >= 0 && - dir_times[2] < best_time + P3M_TIME_GRAN) && - (dir_times[0] < 0 || dir_times[0] > dir_times[2])) - final_dir = 1; - else { - /* really no chance for optimisation */ - return best_time; - } + /* even alpha=0 is ok, however, we cannot choose it since it kills the + k-space error formula. + Anyways, this very likely NOT the optimal solution */ + alpha_L = 0.1; } - /* we already checked the initial cao and its neighbor */ - cao += 2 * final_dir; - } else { - /* here some constraint is active, and we only checked the initial cao - * itself */ - cao += final_dir; - } - /* move cao into the optimisation direction until we do not gain anymore. */ - for (; cao >= cao_min && cao <= cao_max; cao += final_dir) { - tmp_time = - dp3m_mc_time(dp3m, mesh, cao, r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, - &tmp_alpha_L, &tmp_accuracy, timings, logger); - /* if we cannot meet the precision anymore, give up */ - if (tmp_time < 0) - break; - - if (tmp_time < best_time) { - best_time = tmp_time; - *_r_cut_iL = tmp_r_cut_iL; - *_alpha_L = tmp_alpha_L; - *_accuracy = tmp_accuracy; - *_cao = cao; - } else if (tmp_time > best_time + P3M_TIME_GRAN) { - /* no hope of further optimisation */ - break; + /* calculate real-space and k-space error for this alpha_L */ + rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL, + dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L); + ks_err = dp3m_k_space_error(box_geo.length()[0], mesh[0], cao, + dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L); + + return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L}; + } + + void determine_mesh_limits() override { + if (dp3m.params.mesh[0] == -1) { + /* simple heuristic to limit the tried meshes if the accuracy cannot + be obtained with smaller meshes, but normally not all these + meshes have to be tested */ + auto const expo = std::log(std::cbrt(dp3m.sum_dip_part)) / std::log(2.); + /* Medium-educated guess for the minimal mesh */ + m_mesh_min = static_cast(std::round(std::pow(2., std::floor(expo)))); + /* avoid using more than 1 GB of FFT arrays */ + m_mesh_max = 128; + } else { + m_mesh_min = m_mesh_max = dp3m.params.mesh[0]; + m_logger->report_fixed_mesh(dp3m.params.mesh); } } - return best_time; -} - -void DipolarP3M::adaptive_tune() { - /** Tuning of dipolar P3M. The algorithm basically determines the mesh, cao - * and then the real space cutoff, in this nested order. - * - * For each mesh, the cao optimal for the mesh tested previously is used as - * an initial guess, and the algorithm tries whether increasing or decreasing - * it leads to a better solution. This is efficient, since the optimal cao - * only changes little with the meshes in general. - * - * The real space cutoff for a given mesh and cao is determined via a - * bisection on the error estimate, which determines where the error - * estimate equals the required accuracy. Therefore the smallest possible, - * i.e. fastest real space cutoff is determined. - * - * Both the search over mesh and cao stop to search in a specific direction - * once the computation time is significantly higher than the currently - * known optimum. - */ - auto constexpr max_n_consecutive_trials = 20; - auto constexpr time_sentinel = std::numeric_limits::max(); - int mesh_max, mesh = -1, mesh_min, tmp_mesh; - double r_cut_iL_min, r_cut_iL_max, r_cut_iL = -1, tmp_r_cut_iL = 0.0; - int cao_min, cao_max, cao = -1, tmp_cao; - - double alpha_L = -1, tmp_alpha_L = 0.0; - double accuracy = -1, tmp_accuracy = 0.0; - double time_best = time_sentinel, tmp_time; - - count_magnetic_particles(); - if (dp3m.sum_dip_part == 0) { - throw std::runtime_error("DipolarP3M: no dipolar particles in the system"); - } - auto logger = TuningLogger{tune_verbose and this_node == 0, "DipolarP3M", - TuningLogger::Mode::Dipolar}; - logger.tuning_goals(dp3m.params.accuracy, prefactor, box_geo.length()[0], - dp3m.sum_dip_part, dp3m.sum_mu2); - - /* Activate tuning mode */ - dp3m.params.tuning = true; - - /* parameter ranges */ - if (dp3m.params.mesh[0] == -1) { - /* simple heuristic to limit the tried meshes if the accuracy cannot - be obtained with smaller meshes, but normally not all these - meshes have to be tested */ - auto const expo = std::log(std::cbrt(dp3m.sum_dip_part)) / std::log(2.); - /* Medium-educated guess for the minimal mesh */ - mesh_min = (int)(std::pow(2., (double)((int)expo)) + 0.1); - /* avoid using more than 1 GB of FFT arrays */ - mesh_max = 128; - } else { - mesh_min = mesh_max = dp3m.params.mesh[0]; - logger.fixed_mesh(dp3m.params.mesh); - } + TuningAlgorithm::Parameters get_time() override { + auto tuned_params = TuningAlgorithm::Parameters{}; + auto time_best = time_sentinel; + for (auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) { + auto trial_params = TuningAlgorithm::Parameters{}; + trial_params.mesh = Utils::Vector3i::broadcast(tmp_mesh); + trial_params.cao = cao_best; - if (dp3m.params.r_cut_iL == 0.) { - auto const min_box_l = *boost::min_element(box_geo.length()); - auto const min_local_box_l = *boost::min_element(local_geo.length()); - - r_cut_iL_min = 0; - r_cut_iL_max = std::min(min_local_box_l, min_box_l / 2) - skin; - r_cut_iL_min *= box_geo.length_inv()[0]; - r_cut_iL_max *= box_geo.length_inv()[0]; - } else { - r_cut_iL_min = r_cut_iL_max = dp3m.params.r_cut_iL; - logger.fixed_r_cut_iL(dp3m.params.r_cut_iL); - } + auto const trial_time = + get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL, + trial_params.alpha_L, trial_params.accuracy); - if (dp3m.params.cao == -1) { - cao_min = 1; - cao_max = 7; - cao = 3; - } else { - cao_min = cao_max = cao = dp3m.params.cao; - logger.fixed_cao(dp3m.params.cao); - } + /* this mesh does not work at all */ + if (trial_time < 0.) + continue; - logger.log_tuning_start(); - - /* mesh loop */ - for (tmp_mesh = mesh_min; tmp_mesh <= mesh_max; tmp_mesh += 2) { - tmp_cao = cao; - tmp_time = dp3m_m_time(dp3m, tmp_mesh, cao_min, cao_max, &tmp_cao, - r_cut_iL_min, r_cut_iL_max, &tmp_r_cut_iL, - &tmp_alpha_L, &tmp_accuracy, tune_timings, logger); - /* this mesh does not work at all */ - if (tmp_time < 0.) - continue; - - /* the optimum r_cut for this mesh is the upper limit for higher meshes, - everything else is slower */ - r_cut_iL_max = tmp_r_cut_iL; - - if (tmp_time < time_best) { - /* new optimum */ - logger.reset_n_trials(); - time_best = tmp_time; - mesh = tmp_mesh; - cao = tmp_cao; - r_cut_iL = tmp_r_cut_iL; - alpha_L = tmp_alpha_L; - accuracy = tmp_accuracy; - } else if (tmp_time > time_best + P3M_TIME_GRAN or - logger.get_n_trials() > max_n_consecutive_trials) { - /* no hope of further optimisation */ - break; + /* the optimum r_cut for this mesh is the upper limit for higher meshes, + everything else is slower */ + m_r_cut_iL_max = trial_params.r_cut_iL; + + if (trial_time < time_best) { + /* new optimum */ + reset_n_trials(); + tuned_params = trial_params; + time_best = tuned_params.time = trial_time; + } else if (trial_time > time_best + time_granularity or + get_n_trials() > max_n_consecutive_trials) { + /* no hope of further optimisation */ + break; + } } + return tuned_params; } - - if (time_best == time_sentinel) { - throw std::runtime_error("DipolarP3M: failed to reach requested accuracy"); - } - - /* set tuned p3m parameters */ - dp3m.params.tuning = false; - dp3m.params.r_cut_iL = r_cut_iL; - dp3m.params.mesh = Utils::Vector3i::broadcast(mesh); - dp3m.params.cao = cao; - dp3m.params.alpha_L = alpha_L; - dp3m.params.accuracy = accuracy; - - logger.tuning_results(dp3m.params.mesh, cao, r_cut_iL, alpha_L, accuracy, - time_best); - m_is_tuned = true; - on_dipoles_change(); -} +}; void DipolarP3M::tune() { if (dp3m.params.alpha_L == 0. and dp3m.params.alpha != 0.) { @@ -1011,8 +684,22 @@ void DipolarP3M::tune() { dp3m.params.r_cut_iL = dp3m.params.r_cut * box_geo.length_inv()[0]; } if (not is_tuned()) { + count_magnetic_particles(); + if (dp3m.sum_dip_part == 0) { + throw std::runtime_error( + "DipolarP3M: no dipolar particles in the system"); + } try { - adaptive_tune(); + DipolarTuningAlgorithm parameters(dp3m, prefactor, tune_timings); + parameters.setup_logger(tune_verbose); + // parameter ranges + parameters.determine_mesh_limits(); + parameters.determine_r_cut_limits(); + parameters.determine_cao_limits(3); + // run tuning algorithm + parameters.tune(); + m_is_tuned = true; + on_dipoles_change(); } catch (...) { dp3m.params.tuning = false; throw; @@ -1181,7 +868,7 @@ void DipolarP3M::sanity_checks_periodicity() const { } } -void DipolarP3M::sanity_checks_cell_struture() const { +void DipolarP3M::sanity_checks_cell_structure() const { if (local_geo.cell_structure_type() != CellStructureType::CELL_STRUCTURE_REGULAR) { throw std::runtime_error( @@ -1194,7 +881,6 @@ void DipolarP3M::sanity_checks_node_grid() const { throw std::runtime_error( "DipolarP3M: node grid must be sorted, largest first"); } - sanity_checks_boxl(); } void DipolarP3M::scaleby_box_l() { diff --git a/src/core/magnetostatics/dp3m.hpp b/src/core/magnetostatics/dp3m.hpp index 12d0db590bd..8a6aff6606a 100644 --- a/src/core/magnetostatics/dp3m.hpp +++ b/src/core/magnetostatics/dp3m.hpp @@ -112,17 +112,17 @@ struct DipolarP3M { void on_node_grid_change() const { sanity_checks_node_grid(); } void on_periodicity_change() const { sanity_checks_periodicity(); } void on_cell_structure_change() { - sanity_checks_cell_struture(); + sanity_checks_cell_structure(); init(); } /** @brief Recalculate all derived parameters. */ void init(); void sanity_checks() const { + sanity_checks_boxl(); sanity_checks_node_grid(); sanity_checks_periodicity(); - sanity_checks_cell_struture(); - sanity_checks_boxl(); + sanity_checks_cell_structure(); } /** @@ -295,9 +295,8 @@ struct DipolarP3M { void sanity_checks_boxl() const; void sanity_checks_node_grid() const; void sanity_checks_periodicity() const; - void sanity_checks_cell_struture() const; + void sanity_checks_cell_structure() const; - void adaptive_tune(); void scaleby_box_l(); }; diff --git a/src/core/p3m/CMakeLists.txt b/src/core/p3m/CMakeLists.txt index c16471e696f..e140b214599 100644 --- a/src/core/p3m/CMakeLists.txt +++ b/src/core/p3m/CMakeLists.txt @@ -19,5 +19,6 @@ target_sources( EspressoCore PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/TuningAlgorithm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/send_mesh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fft.cpp) diff --git a/src/core/p3m/TuningAlgorithm.cpp b/src/core/p3m/TuningAlgorithm.cpp new file mode 100644 index 00000000000..acdddb075de --- /dev/null +++ b/src/core/p3m/TuningAlgorithm.cpp @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "config.hpp" + +#if defined(P3M) || defined(DP3M) + +#include "p3m/TuningAlgorithm.hpp" +#include "p3m/common.hpp" + +#include "tuning.hpp" + +#include "communication.hpp" +#include "grid.hpp" +#include "integrate.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +/** @name Error codes for tuning. */ +/**@{*/ +/** charge assignment order too large for mesh size */ +static auto constexpr P3M_TUNE_CAO_TOO_LARGE = 1.; +/** conflict with ELC gap size */ +static auto constexpr P3M_TUNE_ELC_GAP_SIZE = 2.; +/** could not achieve target accuracy */ +static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE = 3.; +/**@}*/ + +/** @brief Precision threshold for a non-zero real-space cutoff. */ +static auto constexpr P3M_RCUT_PREC = 1e-3; + +void TuningAlgorithm::determine_r_cut_limits() { + auto const r_cut_iL = get_params().r_cut_iL; + if (r_cut_iL == 0.) { + auto const min_box_l = *boost::min_element(box_geo.length()); + auto const min_local_box_l = *boost::min_element(local_geo.length()); + m_r_cut_iL_min = 0.; + m_r_cut_iL_max = std::min(min_local_box_l, min_box_l / 2.) - skin; + m_r_cut_iL_min *= box_geo.length_inv()[0]; + m_r_cut_iL_max *= box_geo.length_inv()[0]; + } else { + m_r_cut_iL_min = m_r_cut_iL_max = r_cut_iL; + m_logger->report_fixed_r_cut_iL(r_cut_iL); + } +} + +void TuningAlgorithm::determine_cao_limits(int initial_cao) { + assert(initial_cao >= 1 and initial_cao <= 7); + auto const cao = get_params().cao; + if (cao == -1) { + cao_min = 1; + cao_max = 7; + cao_best = initial_cao; + } else { + cao_min = cao_max = cao_best = cao; + m_logger->report_fixed_cao(cao); + } +} + +void TuningAlgorithm::commit(Utils::Vector3i const &mesh, int cao, + double r_cut_iL, double alpha_L) { + auto &p3m_params = get_params(); + p3m_params.r_cut = r_cut_iL * box_geo.length()[0]; + p3m_params.r_cut_iL = r_cut_iL; + p3m_params.cao = cao; + p3m_params.alpha_L = alpha_L; + p3m_params.alpha = alpha_L * box_geo.length_inv()[0]; + p3m_params.mesh = mesh; +} + +/** + * @brief Get the optimal alpha and the corresponding computation time + * for a fixed @p mesh and @p cao. + * + * The @p tuned_r_cut_iL is determined via a simple bisection. + * + * @param[in] mesh @copybrief P3MParameters::mesh + * @param[in] cao @copybrief P3MParameters::cao + * @param[in,out] tuned_r_cut_iL @copybrief P3MParameters::r_cut_iL + * @param[in,out] tuned_alpha_L @copybrief P3MParameters::alpha_L + * @param[in,out] tuned_accuracy @copybrief P3MParameters::accuracy + * + * @returns The integration time in case of success, otherwise + * -@ref P3M_TUNE_ACCURACY_TOO_LARGE, + * -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELC_GAP_SIZE + */ +double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao, + double &tuned_r_cut_iL, + double &tuned_alpha_L, + double &tuned_accuracy) { + auto const target_accuracy = get_params().accuracy; + double rs_err, ks_err; + double r_cut_iL_min = m_r_cut_iL_min; + double r_cut_iL_max = m_r_cut_iL_max; + + /* initial checks. */ + auto const k_cut_per_dir = (static_cast(cao) / 2.) * + Utils::hadamard_division(box_geo.length(), mesh); + auto const k_cut = *boost::min_element(k_cut_per_dir); + auto const min_box_l = *boost::min_element(box_geo.length()); + auto const min_local_box_l = *boost::min_element(local_geo.length()); + auto const k_cut_max = std::min(min_box_l, min_local_box_l) - skin; + + if (cao >= *boost::min_element(mesh) or k_cut >= k_cut_max) { + m_logger->log_cao_too_large(mesh[0], cao); + return -P3M_TUNE_CAO_TOO_LARGE; + } + + std::tie(tuned_accuracy, rs_err, ks_err, tuned_alpha_L) = + calculate_accuracy(mesh, cao, r_cut_iL_max); + + /* Either low and high boundary are equal (for fixed cut), or the low border + is initially 0 and therefore + has infinite error estimate, as required. Therefore if the high boundary + fails, there is no possible r_cut */ + if (tuned_accuracy > target_accuracy) { + m_logger->log_skip("accuracy not achieved", mesh[0], cao, r_cut_iL_max, + tuned_alpha_L, tuned_accuracy, rs_err, ks_err); + return -P3M_TUNE_ACCURACY_TOO_LARGE; + } + + double r_cut_iL, accuracy; + for (;;) { + r_cut_iL = 0.5 * (r_cut_iL_min + r_cut_iL_max); + + if (r_cut_iL_max - r_cut_iL_min < P3M_RCUT_PREC) + break; + + /* bisection */ + std::tie(accuracy, rs_err, ks_err, tuned_alpha_L) = + calculate_accuracy(mesh, cao, r_cut_iL); + if (accuracy > target_accuracy) + r_cut_iL_min = r_cut_iL; + else + r_cut_iL_max = r_cut_iL; + } + + /* final result is always the upper interval boundary, since only there + * we know that the desired minimal accuracy is obtained */ + tuned_r_cut_iL = r_cut_iL = r_cut_iL_max; + + /* if we are running P3M+ELC, check that r_cut is compatible */ + auto const r_cut = r_cut_iL * box_geo.length()[0]; + auto const veto = layer_correction_veto_r_cut(r_cut); + if (not veto.empty()) { + m_logger->log_skip(veto, mesh[0], cao, r_cut_iL, tuned_alpha_L, + tuned_accuracy, rs_err, ks_err); + return -P3M_TUNE_ELC_GAP_SIZE; + } + + commit(mesh, cao, r_cut_iL, tuned_alpha_L); + on_solver_change(); + auto const int_time = benchmark_integration_step(m_timings); + + std::tie(tuned_accuracy, rs_err, ks_err, tuned_alpha_L) = + calculate_accuracy(mesh, cao, r_cut_iL); + + m_logger->log_success(int_time, mesh[0], cao, r_cut_iL, tuned_alpha_L, + tuned_accuracy, rs_err, ks_err); + increment_n_trials(); + return int_time; +} + +/** + * @brief Get the optimal alpha and the corresponding computation time + * for a fixed @p mesh. + * + * @p _cao should contain an initial guess, which is then adapted by stepping + * up and down. + * + * @param[in] mesh @copybrief P3MParameters::mesh + * @param[in,out] tuned_cao initial guess for the + * @copybrief P3MParameters::cao + * @param[out] tuned_r_cut_iL @copybrief P3MParameters::r_cut_iL + * @param[out] tuned_alpha_L @copybrief P3MParameters::alpha_L + * @param[out] tuned_accuracy @copybrief P3MParameters::accuracy + * + * @returns The integration time in case of success, otherwise + * -@ref P3M_TUNE_CAO_TOO_LARGE + */ +double TuningAlgorithm::get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, + double &tuned_r_cut_iL, + double &tuned_alpha_L, + double &tuned_accuracy) { + double best_time = -1., tmp_r_cut_iL = 0., tmp_alpha_L = 0., + tmp_accuracy = 0.; + /* in which direction improvement is possible. Initially, we don't know it + * yet. */ + int final_dir = 0; + int cao = tuned_cao; + + /* the initial step sets a timing mark. If there is no valid r_cut, we can + * only try to increase cao to increase the obtainable precision of the far + * formula. */ + double tmp_time; + do { + tmp_time = get_mc_time(mesh, cao, tmp_r_cut_iL, tmp_alpha_L, tmp_accuracy); + /* cao is too large for this grid, but still the accuracy cannot be + * achieved, give up */ + if (tmp_time == -P3M_TUNE_CAO_TOO_LARGE) { + return tmp_time; + } + /* we have a valid time, start optimising from there */ + if (tmp_time >= 0.) { + best_time = tmp_time; + tuned_r_cut_iL = tmp_r_cut_iL; + tuned_alpha_L = tmp_alpha_L; + tuned_accuracy = tmp_accuracy; + tuned_cao = cao; + break; + } + /* the required accuracy could not be obtained, try higher caos */ + cao++; + final_dir = 1; + } while (cao <= cao_max); + /* with this mesh, the required accuracy cannot be obtained. */ + if (cao > cao_max) + return -P3M_TUNE_CAO_TOO_LARGE; + + /* at the boundaries, only the opposite direction can be used for + * optimisation + */ + if (cao == cao_min) + final_dir = 1; + else if (cao == cao_max) + final_dir = -1; + + if (final_dir == 0) { + /* check in which direction we can optimise. Both directions are possible */ + double dir_times[3]; + for (final_dir = -1; final_dir <= 1; final_dir += 2) { + dir_times[final_dir + 1] = tmp_time = get_mc_time( + mesh, cao + final_dir, tmp_r_cut_iL, tmp_alpha_L, tmp_accuracy); + /* in this direction, we cannot optimise, since we get into precision + * trouble */ + if (tmp_time < 0.) + continue; + + if (tmp_time < best_time) { + best_time = tmp_time; + tuned_r_cut_iL = tmp_r_cut_iL; + tuned_alpha_L = tmp_alpha_L; + tuned_accuracy = tmp_accuracy; + tuned_cao = cao + final_dir; + } + } + /* choose the direction which was optimal, if any of the two */ + if (dir_times[0] == best_time) { + final_dir = -1; + } else if (dir_times[2] == best_time) { + final_dir = 1; + } else { + /* no improvement in either direction, however if one is only marginally + * worse, we can still try; down is possible and not much worse, while + * up is either illegal or even worse */ + if ((dir_times[0] >= 0 && dir_times[0] < best_time + time_granularity) && + (dir_times[2] < 0 || dir_times[2] > dir_times[0])) + final_dir = -1; + /* same for up */ + else if ((dir_times[2] >= 0 && + dir_times[2] < best_time + time_granularity) && + (dir_times[0] < 0 || dir_times[0] > dir_times[2])) + final_dir = 1; + else { + /* really no chance for optimisation */ + return best_time; + } + } + /* we already checked the initial cao and its neighbor */ + cao += 2 * final_dir; + } else { + /* here some constraint is active, and we only checked the initial cao + * itself */ + cao += final_dir; + } + + /* move cao into the optimisation direction until we do not gain anymore. */ + for (; cao >= cao_min && cao <= cao_max; cao += final_dir) { + tmp_time = get_mc_time(mesh, cao, tmp_r_cut_iL, tmp_alpha_L, tmp_accuracy); + /* if we cannot meet the precision anymore, give up */ + if (tmp_time < 0.) + break; + + if (tmp_time < best_time) { + best_time = tmp_time; + tuned_r_cut_iL = tmp_r_cut_iL; + tuned_alpha_L = tmp_alpha_L; + tuned_accuracy = tmp_accuracy; + tuned_cao = cao; + } else if (tmp_time > best_time + time_granularity) { + /* no hope of further optimisation */ + break; + } + } + return best_time; +} + +#endif // P3M or DP3M diff --git a/src/core/p3m/TuningAlgorithm.hpp b/src/core/p3m/TuningAlgorithm.hpp new file mode 100644 index 00000000000..d27713eac18 --- /dev/null +++ b/src/core/p3m/TuningAlgorithm.hpp @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef ESPRESSO_SRC_CORE_P3M_TUNING_ALGORITHM_HPP +#define ESPRESSO_SRC_CORE_P3M_TUNING_ALGORITHM_HPP + +#include "config.hpp" + +#if defined(P3M) || defined(DP3M) + +#include "p3m/TuningLogger.hpp" +#include "p3m/common.hpp" + +#include + +#include +#include +#include +#include +#include + +/** + * @brief Tuning algorithm for P3M. + * + * The algorithm basically determines the mesh, cao + * and then the real-space cutoff, in this order. + * + * For each mesh, the optimal cao for the previous mesh is re-used as an + * initial guess, and the algorithm checks whether increasing or decreasing + * it leads to a better solution. This is efficient, since the optimal cao + * only changes little with the meshes in general. + * + * The real-space cutoff for a given mesh and cao is determined via a + * bisection on the error estimate, which determines where the error + * estimate equals the required accuracy. Therefore the smallest possible, + * i.e. fastest real-space cutoff is determined. + * + * Both the search over mesh and cao stop to search in a specific + * direction once the computation time is significantly higher + * than the currently known optimum. + */ +class TuningAlgorithm { + int m_timings; + std::size_t m_n_trials; + +protected: + double m_prefactor; + std::unique_ptr m_logger = nullptr; + int cao_min = -1, cao_max = -1, cao_best = -1; + double m_r_cut_iL_min = -1., m_r_cut_iL_max = -1.; + + /** + * @brief Granularity of the time measurement (milliseconds). + * Tuning halts when the runtime is larger than the best time plus this value. + */ + static auto constexpr time_granularity = 2.; + + /** + * @brief Maximal number of consecutive trials that don't improve runtime. + * Tuning halts when this threshold is reached. + */ + static auto constexpr max_n_consecutive_trials = 20; + + /** @brief Value for invalid time measurements. */ + static auto constexpr time_sentinel = std::numeric_limits::max(); + +public: + TuningAlgorithm(double prefactor, int timings) + : m_timings{timings}, m_n_trials{0ul}, m_prefactor{prefactor} {} + virtual ~TuningAlgorithm() {} + + struct Parameters { + Utils::Vector3i mesh = {}; + int cao = -1; + double alpha_L = -1.; + double r_cut_iL = -1.; + double accuracy = -1.; + double time = std::numeric_limits::max(); + }; + + /** @brief Get the P3M parameters. */ + virtual P3MParameters &get_params() = 0; + + /** @brief Re-initialize the currently active solver. */ + virtual void on_solver_change() const = 0; + + /** @brief Tuning loop entry point. */ + virtual TuningAlgorithm::Parameters get_time() = 0; + + /** @brief Configure the logger. */ + virtual void setup_logger(bool verbose) = 0; + + /** @brief Determine a sensible range for the mesh. */ + virtual void determine_mesh_limits() = 0; + + /** @brief Determine a sensible range for the real-space cutoff. */ + void determine_r_cut_limits(); + + /** @brief Determine a sensible range for the charge assignment order. */ + void determine_cao_limits(int initial_cao); + + /** + * @brief Get the minimal error for this combination of parameters. + * + * The real-space error is tuned such that it contributes half of the + * total error, and then the k-space error is calculated. + * If an optimal alpha is not found, the value 0.1 is used as fallback. + * @param[in] mesh @copybrief P3MParameters::mesh + * @param[in] cao @copybrief P3MParameters::cao + * @param[in] r_cut_iL @copybrief P3MParameters::r_cut_iL + * @returns Error magnitude, real-space error, k-space error, + * @copybrief P3MParameters::alpha_L + */ + virtual std::tuple + calculate_accuracy(Utils::Vector3i const &mesh, int cao, + double r_cut_iL) const = 0; + + /** @brief Veto real-space cutoffs larger than the layer correction gap. */ + virtual std::string layer_correction_veto_r_cut(double r_cut) const = 0; + + /** @brief Write tuned parameters to the P3M parameter struct. */ + void commit(Utils::Vector3i const &mesh, int cao, double r_cut_iL, + double alpha_L); + + void tune() { + // activate tuning mode + get_params().tuning = true; + + auto const tuned_params = get_time(); + + // deactivate tuning mode + get_params().tuning = false; + + if (tuned_params.time == time_sentinel) { + throw std::runtime_error(m_logger->get_name() + + ": failed to reach requested accuracy"); + } + // set tuned parameters + get_params().accuracy = tuned_params.accuracy; + commit(tuned_params.mesh, tuned_params.cao, tuned_params.r_cut_iL, + tuned_params.alpha_L); + + m_logger->tuning_results(tuned_params.mesh, tuned_params.cao, + tuned_params.r_cut_iL, tuned_params.alpha_L, + tuned_params.accuracy, tuned_params.time); + } + +protected: + auto get_n_trials() { return m_n_trials; } + void increment_n_trials() { ++m_n_trials; } + void reset_n_trials() { m_n_trials = 0ul; } + double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, + double &tuned_r_cut_iL, double &tuned_alpha_L, + double &tuned_accuracy); + double get_mc_time(Utils::Vector3i const &mesh, int cao, + double &tuned_r_cut_iL, double &tuned_alpha_L, + double &tuned_accuracy); +}; + +#endif // P3M or DP3M + +#endif diff --git a/src/core/p3m/TuningLogger.hpp b/src/core/p3m/TuningLogger.hpp index a4245a156b8..b2b91da1d0b 100644 --- a/src/core/p3m/TuningLogger.hpp +++ b/src/core/p3m/TuningLogger.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2019 The ESPResSo project + * Copyright (C) 2010-2022 The ESPResSo project * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 * Max-Planck-Institute for Polymer Research, Theory Group * @@ -18,8 +18,9 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef ESPRESSO_CORE_P3M_TUNING_HPP -#define ESPRESSO_CORE_P3M_TUNING_HPP + +#ifndef ESPRESSO_SRC_CORE_P3M_TUNING_LOGGER_HPP +#define ESPRESSO_SRC_CORE_P3M_TUNING_LOGGER_HPP #include "config.hpp" @@ -43,12 +44,6 @@ class TuningLogger { } } - auto get_n_trials() { return n_trials; } - - void increment_n_trials() { ++n_trials; } - - void reset_n_trials() { n_trials = 0; } - template void log_success(double time, Types... parameter_set) const { if (m_verbose) { @@ -104,29 +99,30 @@ class TuningLogger { } } - void fixed_cao(int cao) const { + void report_fixed_cao(int cao) const { if (m_verbose) { std::printf("fixed cao %d\n", cao); } } - void fixed_r_cut_iL(double r_cut_iL) const { + void report_fixed_r_cut_iL(double r_cut_iL) const { if (m_verbose) { std::printf("fixed r_cut_iL %f\n", r_cut_iL); } } - void fixed_mesh(Utils::Vector3i const &mesh) const { + void report_fixed_mesh(Utils::Vector3i const &mesh) const { if (m_verbose) { std::printf("fixed mesh (%d, %d, %d)\n", mesh[0], mesh[1], mesh[2]); } } + auto get_name() const { return m_name; } + private: bool m_verbose; std::string m_name; Mode m_mode; - std::size_t n_trials; void row(int mesh, int cao, double r_cut_iL, double alpha_L, double accuracy, double rs_err, double ks_err) const { diff --git a/src/core/p3m/common.hpp b/src/core/p3m/common.hpp index b8a28eba374..536dd52f3ab 100644 --- a/src/core/p3m/common.hpp +++ b/src/core/p3m/common.hpp @@ -32,8 +32,8 @@ * */ -#ifndef ESPRESSO_CORE_P3M_COMMON_HPP -#define ESPRESSO_CORE_P3M_COMMON_HPP +#ifndef ESPRESSO_SRC_CORE_P3M_COMMON_HPP +#define ESPRESSO_SRC_CORE_P3M_COMMON_HPP #include "config.hpp" @@ -51,21 +51,6 @@ auto constexpr P3M_EPSILON_METALLIC = 0.0; #include #include -/** Error Codes for p3m tuning (version 2) */ -enum P3M_TUNE_ERROR { - /** force evaluation failed */ - P3M_TUNE_FAIL = 1, - /** could not find a valid realspace cutoff radius */ - P3M_TUNE_NOCUTOFF = 2, - /** charge assignment order too large for mesh size */ - P3M_TUNE_CAO_TOO_LARGE = 4, - /** conflict with ELC gap size */ - P3M_TUNE_ELCTEST = 8, - P3M_TUNE_CUTOFF_TOO_LARGE = 16, - /** could not achieve target accuracy */ - P3M_TUNE_ACCURACY_TOO_LARGE = 32 -}; - namespace detail { /** @brief Index helpers for direct and reciprocal space. * After the FFT the data is in order YZX, which @@ -77,15 +62,6 @@ enum FFT_WAVE_VECTOR : int { KY = 0, KZ = 1, KX = 2 }; } // namespace FFT_indexing } // namespace detail -/** precision limit for the r_cut zero */ -#define P3M_RCUT_PREC 1e-3 -/** granularity of the time measurement */ -#define P3M_TIME_GRAN 2 - -/************************************************ - * data types - ************************************************/ - /** Structure to hold P3M parameters and some dependent variables. */ struct P3MParameters { /** tuning or production? */ @@ -242,7 +218,7 @@ struct P3MLocalMesh { double space_layer); }; -/** One of the aliasing sums used by \ref p3m_k_space_error. +/** One of the aliasing sums used to compute k-space errors. * Fortunately the one which is most important (because it converges * most slowly, since it is not damped exponentially) can be * calculated analytically. The result (which depends on the order of diff --git a/src/core/pressure.cpp b/src/core/pressure.cpp index 3566c484af9..16550671a91 100644 --- a/src/core/pressure.cpp +++ b/src/core/pressure.cpp @@ -57,8 +57,9 @@ static std::shared_ptr calculate_pressure_local() { auto obs_pressure_ptr = std::make_shared(9); - if (long_range_interactions_sanity_checks()) + if (long_range_interactions_sanity_checks()) { return obs_pressure_ptr; + } auto &obs_pressure = *obs_pressure_ptr; diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp index bb35da7f157..14e0b3e7643 100644 --- a/src/script_interface/electrostatics/CoulombP3M.hpp +++ b/src/script_interface/electrostatics/CoulombP3M.hpp @@ -27,7 +27,6 @@ #include "Actor.hpp" #include "core/electrostatics/p3m.hpp" -#include "core/tuning.hpp" #include "script_interface/get_value.hpp" diff --git a/src/script_interface/tests/Actors_test.cpp b/src/script_interface/tests/Actors_test.cpp index 604802c9c20..9a4bd19e1c3 100644 --- a/src/script_interface/tests/Actors_test.cpp +++ b/src/script_interface/tests/Actors_test.cpp @@ -33,6 +33,7 @@ #include "script_interface/magnetostatics/Actor_impl.hpp" #include "script_interface/scafacos/scafacos.hpp" +#include "core/actor/visitors.hpp" #include "core/electrostatics/coulomb.hpp" #include "core/electrostatics/debye_hueckel.hpp" #include "core/electrostatics/registration.hpp" @@ -96,6 +97,19 @@ BOOST_AUTO_TEST_CASE(coulomb_actor) { // check const and non-const access BOOST_CHECK_CLOSE(actor.actor()->prefactor, 2., tol); BOOST_CHECK_CLOSE(Utils::as_const(actor).actor()->prefactor, 2., tol); + // check visitors + BOOST_CHECK(has_actor_of_type<::DebyeHueckel>( + boost::optional(actor.actor()))); + BOOST_CHECK(not has_actor_of_type( + boost::optional(actor.actor()))); + BOOST_CHECK(is_already_stored( + actor.actor(), boost::optional(actor.actor()))); + BOOST_CHECK(not is_already_stored( + std::shared_ptr<::DebyeHueckel>{}, + boost::optional(actor.actor()))); + BOOST_CHECK(not is_already_stored( + std::shared_ptr{}, + boost::optional(actor.actor()))); } #endif // ELECTROSTATICS diff --git a/testsuite/python/coulomb_interface.py b/testsuite/python/coulomb_interface.py index 2cb7ee14f77..e305664475d 100644 --- a/testsuite/python/coulomb_interface.py +++ b/testsuite/python/coulomb_interface.py @@ -215,6 +215,12 @@ def test_elc_p3m_exceptions(self): self.system.actors.add(elc) with self.assertRaisesRegex(Exception, r"ELC: requires periodicity \(1 1 1\)"): self.system.periodicity = [False, False, False] + with self.assertRaisesRegex(Exception, r"requires periodicity \(1 1 1\)"): + self.system.integrator.run(0, recalc_forces=True) + with self.assertRaisesRegex(Exception, r"requires periodicity \(1 1 1\)"): + self.system.analysis.energy() + with self.assertRaisesRegex(Exception, r"requires periodicity \(1 1 1\)"): + self.system.analysis.pressure() self.system.periodicity = [True, True, True] n_nodes = self.system.cell_system.get_state()["n_nodes"] if n_nodes > 1: