Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run minimize_energy! until gradient fully converges #333

Merged
merged 6 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/src/versions.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
(In development)

* Better error message when a $g$-tensor is symmetry disallowed.
* Higher-precision convergence in [`minimize_energy!`](@ref).
* Fix [`minimize_energy!`](@ref) when used with [`set_vacancy_at!`](@ref).

## v0.7.3
Expand Down
33 changes: 17 additions & 16 deletions src/Optimization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,16 @@ end

"""
minimize_energy!(sys::System{N}; maxiters=1000, method=Optim.ConjugateGradient(),
g_tol=1e-10, kwargs...) where N
kwargs...) where N

Optimizes the spin configuration in `sys` to minimize energy. A total of
`maxiters` iterations will be attempted. Convergence is reached when the root
mean squared energy gradient goes below `g_tol`. The remaining `kwargs` will be
forwarded to the `optimize` method of the Optim.jl package.
`maxiters` iterations will be attempted. The `method` parameter will be used in
the `optimize` function of the [Optim.jl
package](https://github.com/JuliaNLSolvers/Optim.jl). Any remaining `kwargs`
will be included in the `Options` constructor of Optim.jl.
"""
function minimize_energy!(sys::System{N}; maxiters=1000, subiters=10, method=Optim.ConjugateGradient(),
g_tol=1e-10, kwargs...) where N
kwargs...) where N
# Allocate buffers for optimization:
# - Each `ns[site]` defines a direction for stereographic projection.
# - Each `αs[:,site]` will be optimized in the space orthogonal to `ns[site]`.
Expand All @@ -123,18 +124,19 @@ function minimize_energy!(sys::System{N}; maxiters=1000, subiters=10, method=Opt
optim_set_gradient!(G, sys, αs, ns)
end

# Monitor energy gradient magnitude relative to absolute tolerance `g_tol`.
g_res = Inf

# Repeatedly optimize using a small number (`subiters`) of steps.
options = Optim.Options(; iterations=subiters, g_tol, kwargs...)
# Repeatedly optimize using a small number (`subiters`) of steps. See
# https://github.com/JuliaNLSolvers/Optim.jl/issues/1120 for discussion of
# settings required to find the minimizer `x` to high-accuracy. Note that
# `x` contains normalized spin variables, while gradient `g` has dimensions
# of energy, so we elect to set `x_tol` as the dimensionless convergence
# tolerance.
options = Optim.Options(; iterations=subiters, x_tol=1e-12, g_tol=0, f_reltol=NaN, f_abstol=NaN, kwargs...)
local output
for iter in 1 : div(maxiters, subiters, RoundUp)
output = Optim.optimize(f, g!, αs, method, options)
g_res = Optim.g_residual(output)
@assert g_tol == Optim.g_tol(output)

# Exit if converged
if g_res ≤ g_tol
if Optim.converged(output)
cnt = (iter-1)*subiters + output.iterations
return cnt
end
Expand All @@ -144,8 +146,7 @@ function minimize_energy!(sys::System{N}; maxiters=1000, subiters=10, method=Opt
αs .*= 0
end

# res_str = number_to_simple_string(g_res; digits=2)
# tol_str = number_to_simple_string(g_tol; digits=2)
# @warn "Optimization failed to converge within $maxiters iterations ($res_str ≰ $tol_str)"
f_abschange, x_abschange, g_residual = number_to_simple_string.((Optim.f_abschange(output), Optim.x_abschange(output), Optim.g_residual(output)); digits=2)
@warn "Non-converged after $maxiters iterations: |ΔE|=$f_abschange, |Δx|=$x_abschange, |∂E/∂x|=$g_residual"
return -1
end
7 changes: 4 additions & 3 deletions src/Spiral/SpiralEnergy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ function minimize_spiral_energy!(sys, axis; maxiters=10_000, k_guess=randn(sys.r
check_rotational_symmetry(sys; axis, θ=0.01)

L = natoms(sys.crystal)

params = fill(zero(Vec3), L+1)
for i in 1:L
params[i] = inverse_stereographic_projection(normalize(sys.dipoles[i]), axis)
Expand All @@ -218,8 +218,9 @@ function minimize_spiral_energy!(sys, axis; maxiters=10_000, k_guess=randn(sys.r
f(params) = spiral_f(sys, axis, params, λ)
g!(G, params) = spiral_g!(G, sys, axis, params, λ)

# Minimize f, the energy of a spiral
options = Optim.Options(; iterations=maxiters)
# Minimize f, the energy of a spiral. See comment in `minimize_energy!` for
# a discussion of the tolerance settings.
options = Optim.Options(; iterations=maxiters, x_tol=1e-12, g_tol=0, f_reltol=NaN, f_abstol=NaN)

# LBFGS does not converge to high precision, but ConjugateGradient can fail
# to converge: https://github.com/JuliaNLSolvers/LineSearches.jl/issues/175.
Expand Down
44 changes: 22 additions & 22 deletions test/test_lswt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# minimize_energy!(sys; maxiters=1000)
# println(round(reinterpret(reshape, ComplexF64, sys.coherents); digits=3))
ground_state = ComplexF64[0.452 + 0.069im; 0.586 + 0.166im; 0.31 + 0.351im; 0.035 + 0.39im; -0.136 + 0.145im; -0.03 - 0.076im;;;;; -0.026 - 0.064im; 0.247 - 0.209im; -0.444 + 0.307im; 0.501 - 0.122im; -0.478 - 0.012im; 0.32 - 0.046im;;;;; -0.078 - 0.451im; -0.029 - 0.609im; 0.234 - 0.406im; 0.358 - 0.158im; 0.181 + 0.083im; -0.063 + 0.053im;;;;; -0.061 + 0.033im; -0.235 - 0.222im; 0.354 + 0.407im; -0.177 - 0.485im; 0.042 + 0.476im; -0.082 - 0.312im;;;;; -0.782 + 0.332im; 0.184 - 0.453im; 0.058 + 0.138im; 0.121 + 0.049im; -0.007 + 0.003im; -0.006 + 0.015im;;;;; 0.057 - 0.005im; -0.023 - 0.013im; -0.101 + 0.009im; -0.247 - 0.257im; -0.631 - 0.193im; -0.617 + 0.209im;;;;; -0.695 - 0.489im; 0.482 - 0.083im; -0.087 + 0.122im; 0.022 + 0.129im; -0.006 - 0.004im; -0.016 + 0.003im;;;;; 0.04 + 0.041im; -0.004 - 0.026im; -0.07 - 0.072im; 0.044 - 0.354im; -0.248 - 0.611im; -0.551 - 0.347im;;;;; 0.347 + 0.149im; -0.216 + 0.511im; -0.386 - 0.36im; 0.434 - 0.153im; -0.024 + 0.24im; 0.026 - 0.021im;;;;; 0.061 + 0.082im; 0.096 + 0.219im; 0.361 + 0.203im; 0.487 - 0.18im; 0.228 - 0.554im; -0.282 - 0.229im;;;;; -0.205 + 0.317im; -0.468 - 0.298im; 0.419 - 0.32im; 0.078 + 0.454im; -0.233 - 0.064im; 0.016 + 0.029im;;;;; 0.101 + 0.018im; 0.22 + 0.095im; 0.402 - 0.097im; 0.233 - 0.464im; -0.211 - 0.561im; -0.363 + 0.025im;;;;; 0.059 - 0.582im; 0.442 - 0.355im; 0.51 + 0.063im; 0.158 + 0.213im; -0.009 + 0.024im; 0.03 - 0.026im;;;;; 0.582 - 0.058im; 0.436 + 0.363im; 0.04 + 0.512im; -0.177 + 0.197im; -0.025 - 0.004im; 0.031 + 0.024im;;;;; 0.561 + 0.165im; 0.268 + 0.5im; -0.156 + 0.49im; -0.238 + 0.116im; -0.022 - 0.013im; 0.02 + 0.034im;;;;; -0.337 + 0.478im; -0.56 + 0.092im; -0.413 - 0.306im; -0.033 - 0.263im; 0.019 - 0.017im; -0.039 + 0.008im]

sys.coherents .= reinterpret(reshape, Sunny.CVec{6}, ground_state)
minimize_energy!(sys)
@assert energy_per_site(sys) ≈ -328.38255
Expand Down Expand Up @@ -65,7 +65,7 @@
data_flat = reinterpret(ComplexF64, res.data[1:5])
# println(round.(data_flat; digits=12))
data_golden = natoms * ComplexF64[0.000768755803 + 0.0im, 0.000453313199 - 4.8935387e-5im, 0.000468535469 + 8.5812793e-5im, 0.000453313199 + 4.8935387e-5im, 0.00027042076 + 0.0im, 0.000270819458 + 8.0426107e-5im, 0.000468535469 - 8.5812793e-5im, 0.000270819458 - 8.0426107e-5im, 0.000295138353 + 0.0im, 0.0 + 0.0im, 0.0 - 0.0im, 0.0 + 0.0im, 0.0 + 0.0im, 0.0 + 0.0im, 0.0 + 0.0im, 0.0 - 0.0im, 0.0 - 0.0im, 0.0 + 0.0im, 0.00021794048 + 0.0im, -0.000114399503 - 0.000211293782im, -0.000126018935 + 0.000199895684im, -0.000114399503 + 0.000211293782im, 0.000264899429 + 0.0im, -0.000127650501 - 0.000227103219im, -0.000126018935 - 0.000199895684im, -0.000127650501 + 0.000227103219im, 0.000256212415 + 0.0im, 0.0 + 0.0im, -0.0 - 0.0im, -0.0 + 0.0im, -0.0 + 0.0im, 0.0 + 0.0im, -0.0 - 0.0im, -0.0 - 0.0im, -0.0 + 0.0im, 0.0 + 0.0im, 7.5017149e-5 + 0.0im, 0.000206625046 + 0.000158601356im, 0.000240055385 - 0.00011016714im, 0.000206625046 - 0.000158601356im, 0.000904437194 + 0.0im, 0.000428286033 - 0.000810966567im, 0.000240055385 + 0.00011016714im, 0.000428286033 + 0.000810966567im, 0.000929965845 + 0.0im]

@test isapprox(data_flat, data_golden; atol=1e-9)
end

Expand Down Expand Up @@ -160,7 +160,7 @@ end
latvecs = lattice_vectors(a, a, a, 90, 90, 90)
positions = [[0, 0, 0]]
cryst = Crystal(latvecs, positions)

function test_biquad(mode, q, s)
# System
sys = System(cryst, [1 => Moment(; s, g=2)], mode; dims=(2, 2, 2))
Expand All @@ -181,7 +181,7 @@ end
# Analytical result
γq = 2 * (cos(2π*q[1]) + cos(2π*q[2]) + cos(2π*q[3]))
disp_ref = J * (s*cos(α) - (2*s-2+1/s) * sin(α)) * √(36 - γq^2)

@test disp[end-1] ≈ disp[end] ≈ disp_ref
end

Expand All @@ -197,16 +197,16 @@ end

latvecs = lattice_vectors(1, 1, 1, 90, 90, 90)
cryst = Crystal(latvecs, [[0,0,0], [0.4,0,0]]; types=["A", "B"])

sys = System(cryst, [1 => Moment(s=1, g=2), 2 => Moment(s=2, g=2)], :dipole)
set_pair_coupling!(sys, (S1, S2) -> +(S1'*diagm([2,-1,-1])*S1)*(S2'*diagm([2,-1,-1])*S2), Bond(1, 2, [0,0,0]))

θ = randn()
set_dipole!(sys, [1, 0, 0], (1, 1, 1, 1))
set_dipole!(sys, [0, cos(θ), sin(θ)], (1, 1, 1, 2))
energy(sys)
@test energy(sys) ≈ -3

swt = SpinWaveTheory(sys; measure=ssf_trace(sys; apply_g=false))
res = intensities_bands(swt, [[0,0,0]])
@test res.disp[1] ≈ 9
Expand All @@ -223,7 +223,7 @@ end
positions = [[0, 0, 0]]
cryst = Crystal(latvecs, positions)
q = [0.12, 0.23, 0.34]

sys = System(cryst, [1 => Moment(; s, g=-1)], :dipole)
sys = reshape_supercell(sys, [1 -1 0; 1 1 0; 0 0 1])
set_exchange!(sys, J, Bond(1, 1, [1, 0, 0]))
Expand Down Expand Up @@ -304,7 +304,7 @@ end

polarize_spins!(sys, (0,0,1))
@test energy_per_site(sys) ≈ -0.1913132980155851

swt = SpinWaveTheory(sys; measure=ssf_perp(sys))
qs = [[0, 0, 0], [0, 0, 1/2], [0, 1/2, 1/2], [0, 0, 0]]
res = intensities_bands(swt, qs)
Expand All @@ -318,19 +318,19 @@ end
sys = System(cryst, [1 => Moment(s=1, g=2)], :dipole, seed=2)
enable_dipole_dipole!(sys, Units(:meV, :angstrom).vacuum_permeability)
polarize_spins!(sys, (1,2,3)) # arbitrary direction

R = hcat([1,1,-1], [-1,1,1], [1,-1,1]) / 2
sys_reshape = reshape_supercell(sys, R)
@test energy_per_site(sys_reshape) ≈ energy_per_site(sys) ≈ -0.89944235377

swt1 = SpinWaveTheory(sys; measure=nothing)
swt2 = SpinWaveTheory(sys_reshape; measure=nothing)
q = [0.5, -0.1, 0.3]
disp1 = dispersion(swt1, [q])
disp2 = dispersion(swt2, [q])

@test disp1 ≈ [1.3236778213351734, 0.9206655419611791]
@test disp2 ≈ [0.9206655419611772]
@test disp2 ≈ [0.9206655419611772]
end
end

Expand Down Expand Up @@ -359,7 +359,7 @@ end
swt = SpinWaveTheory(sys; measure)
q = [0.41568,0.56382,0.76414]
res = intensities_bands(swt, [q])

SpinW_energies = [2.6267,2.6541,2.8177,2.8767,3.2458,3.3172,3.4727,3.7767,3.8202,3.8284,3.8749,3.9095,3.9422,3.9730,4.0113,4.0794,4.2785,4.4605,4.6736,4.7564,4.7865]
SpinW_intensities = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2999830079, -0.2999830079im, 0,0.2999830079im, 0.2999830079, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.3591387785, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5954018134, -0.5954018134im, 0,0.5954018134im, 0.5954018134, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.3708506016,1.3708506016im, 0, -1.3708506016im, 1.3708506016, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0511743697, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0734875342, 0.0 + 0.0734875342im, 0, 0.0 - 0.0734875342im, 0.0734875342, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0577275935, -0.0577275935im, 0,0.0577275935im, 0.0577275935, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.1733740706, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0338873034,0.0338873034im, 0, -0.0338873034im, 0.0338873034, 0, 0, 0, 0]

Expand All @@ -384,7 +384,7 @@ end

h = 2.0*J₃
set_field!(sys, [0, 0, h])

dipole_array = zeros(Float64,3,7,7)
dipole_array[:,:,1]= [0.856769 0.811926 -0.485645 -0.950288 -0.429787 -0.0800935 0.217557;
0.35333 -0.2906 0.0685251 -0.297433 -0.854291 -0.004611 0.97169;
Expand All @@ -410,7 +410,7 @@ end
for x in 1:7, y in 1:7
set_dipole!(sys, dipole_array[:,x,y], (x,y,1,1))
end

q1 = [0.0391,0.0415,0.5530]
q2 = [0.2360,0.7492,0.9596]
q3 = [0.1131,0.7654,0.2810]
Expand All @@ -428,7 +428,7 @@ end
end


@testitem "Invariance to reshaping" begin
@testitem "Invariance to reshaping" begin
# Diamond-cubic with antiferromagnetic exchange
latvecs = lattice_vectors(1, 1, 1, 90, 90, 90)
cryst = Crystal(latvecs, [[0,0,0]], 227; choice="1")
Expand All @@ -438,12 +438,12 @@ end
randomize_spins!(sys)
minimize_energy!(sys)
@test energy_per_site(sys) ≈ -2s^2

# Reshaped system
shape = [0 1 1; 1 0 1; 1 1 0] / 2
sys_prim = reshape_supercell(sys, shape)
@test energy_per_site(sys_prim) ≈ -2s^2

# Both systems should produce the same intensities
formfactors = [1 => FormFactor("Co2")]
swt1 = SpinWaveTheory(sys_prim; measure=ssf_perp(sys_prim; formfactors))
Expand Down Expand Up @@ -594,7 +594,7 @@ end
using LinearAlgebra

function dimer_model(; J=1.0, J′=0.0, h=0.0, dims=(2,1,1), fast=false)
cryst = Crystal(I(3), [[0,0,0]], 1)
cryst = Crystal(I(3), [[0,0,0]], 1)
sys = System(cryst, [1 => Moment(s=3/2, g=1)], :SUN; dims)

S = spin_matrices(1/2)
Expand All @@ -605,13 +605,13 @@ end
S1i, S1j = Sunny.to_product_space(S1, S1)
S2i, S2j = Sunny.to_product_space(S2, S2)
scalar, bilin, biquad, tensordec = Sunny.decompose_general_coupling(J′*(S1i' * S1j + S2i' * S2j), 4, 4; extract_parts=fast)
Sunny.set_pair_coupling_aux!(sys, scalar, Sunny.Mat3(I)*bilin, biquad, tensordec, Bond(1, 1, [-1,0,0]))
Sunny.set_pair_coupling_aux!(sys, scalar, Sunny.Mat3(I)*bilin, biquad, tensordec, Bond(1, 1, [-1,0,0]))

return sys, cryst
end

# Set up dimer model and observables (0 and π channels manually specified)
sys, cryst = dimer_model(; J=1.0, J′=0.2, h=0.0, dims=(2,1,1), fast=false)
sys, cryst = dimer_model(; J=1.0, J′=0.2, h=0.0, dims=(2,1,1), fast=false)
s = spin_matrices(1/2)
S1, S2 = Sunny.to_product_space(s, s)
observables0 = Hermitian.([
Expand Down
Loading