Skip to content

Commit

Permalink
Enable force calculations on GPU runs (#1031)
Browse files Browse the repository at this point in the history
  • Loading branch information
abussy authored Dec 5, 2024
1 parent 95a1cb1 commit ed7ae28
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 13 deletions.
10 changes: 5 additions & 5 deletions src/terms/local.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ end
@timing "forces: local" function forces_local(S, basis::PlaneWaveBasis{T}, ρ, q) where {T}
model = basis.model
recip_lattice = model.recip_lattice
ρ_fourier = fft(basis, total_density(ρ))
ρ_fourier = to_cpu(fft(basis, total_density)))
real_ifSreal = S <: Real ? real : identity

# TODO: Right now, forces are not GPU compatible. Refer to compute_local_potential
# comments when working on this
Gqs_cart = [model.recip_lattice * (G + q) for G in G_vectors(basis)]
# TODO: currently, local forces entierly computed on the CPU.
# This might not be optimal. See compute_local_potential() too.
Gqs_cart = [model.recip_lattice * (G + q) for G in to_cpu(G_vectors(basis))]
form_factors = atomic_local_form_factors(basis, Gqs_cart)

# energy = sum of form_factor(G) * struct_factor(G) * rho(G)
Expand All @@ -157,7 +157,7 @@ end
* cis2pi(-dot(G + q, r))
* (-2T(π)) * (G + q) * im
/ sqrt(model.unit_cell_volume)
for (iG, G) in enumerate(G_vectors(basis))))
for (iG, G) in enumerate(to_cpu(G_vectors(basis)))))
end
end
forces
Expand Down
9 changes: 6 additions & 3 deletions src/terms/nonlocal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@ end
C = build_projection_coefficients(T, element.psp)
for (ik, kpt) in enumerate(basis.kpoints)
# We compute the forces from the irreductible BZ; they are symmetrized later.
G_plus_k = Gplusk_vectors(basis, kpt)
# TODO: currently, nonlocal forces entierly computed on the CPU.
# This might not be optimal.
G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt))
G_plus_k = to_cpu(Gplusk_vectors(basis, kpt))
ψk = to_cpu(ψ[ik])
occupationk = to_cpu(occupation[ik])
form_factors = build_projector_form_factors(element.psp, G_plus_k_cart)
for idx in group
r = model.positions[idx]
Expand All @@ -78,9 +82,8 @@ end

forces[idx] += map(1:3) do α
dPdR = [-2T(π)*im*p[α] for p in G_plus_k] .* P
ψk = ψ[ik]
δHψk = P * (C * (dPdR' * ψk))
-sum(occupation[ik][iband] * basis.kweights[ik] *
-sum(occupationk[iband] * basis.kweights[ik] *
2real(dot(ψk[:, iband], δHψk[:, iband]))
for iband=1:size(ψk, 2))
end # α
Expand Down
9 changes: 5 additions & 4 deletions src/terms/xc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,12 @@ end
isnothing(term.ρcore) && return nothing

Vxc_real = xc_potential_real(term, basis, ψ, occupation; ρ, τ).potential
# TODO: the factor of 2 here should be associated with the density, not the potential
# TODO: move arrays to the CPU to enable forces calculations on the GPU.
# Might require optimizations in the future.
if basis.model.spin_polarization in (:none, :spinless)
Vxc_fourier = fft(basis, Vxc_real[:,:,:,1])
Vxc_fourier = to_cpu(fft(basis, Vxc_real[:,:,:,1]))
else
Vxc_fourier = fft(basis, mean(Vxc_real, dims=4))
Vxc_fourier = to_cpu(fft(basis, mean(Vxc_real, dims=4)))
end

model = basis.model
Expand All @@ -192,7 +193,7 @@ function _force_xc(basis::PlaneWaveBasis{T}, Vxc_fourier::AbstractArray{U}, form
igroup, r) where {T, U}
TT = promote_type(T, real(U))
f = zero(Vec3{TT})
for (iG, (G, G_cart)) in enumerate(zip(G_vectors(basis), G_vectors_cart(basis)))
for (iG, (G, G_cart)) in enumerate(zip(to_cpu(G_vectors(basis)), to_cpu(G_vectors_cart(basis))))
f -= real(conj(Vxc_fourier[iG])
.* form_factors[(igroup, norm(G_cart))]
.* cis2pi(-dot(G, r))
Expand Down
31 changes: 30 additions & 1 deletion test/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
scfres_gpu = run_problem(; architecture=DFTK.GPU(CuArray))
@test abs(scfres_cpu.energies.total - scfres_gpu.energies.total) < 1e-9
@test norm(scfres_cpu.ρ - Array(scfres_gpu.ρ)) < 1e-9
# Test that forces compute: symmetric structure, forces are zero
@test norm(compute_forces(scfres_cpu) - compute_forces(scfres_gpu)) < 1e-10
end

@testitem "CUDA iron functionality test" tags=[:gpu] setup=[TestCases] begin
Expand All @@ -43,6 +45,34 @@ end
scfres_gpu = run_problem(; architecture=DFTK.GPU(CuArray))
@test abs(scfres_cpu.energies.total - scfres_gpu.energies.total) < 1e-7
@test norm(scfres_cpu.ρ - Array(scfres_gpu.ρ)) < 1e-6
# Test that forces compute: symmetric structure, forces are zero
@test norm(compute_forces(scfres_cpu) - compute_forces(scfres_gpu)) < 1e-9
end

@testitem "CUDA aluminium forces test" tags=[:gpu] setup=[TestCases] begin
using DFTK
using CUDA
using LinearAlgebra
aluminium = TestCases.aluminium
positions = aluminium.positions
# Perturb equilibrium position for non-zero forces
positions[1] += [0.01, 0.0, -0.01]

function run_problem(; architecture)
# Test with a core-corrected PSP for maximal coverage
Al = ElementPsp(aluminium.atnum, :Al, aluminium.mass, load_psp(aluminium.psp_upf))
atoms = fill(Al, length(aluminium.atoms))
model = model_DFT(aluminium.lattice, atoms, positions;
functionals=PBE(), temperature=0.01)
basis = PlaneWaveBasis(model; Ecut=32, kgrid=(1, 1, 1), architecture)
self_consistent_field(basis; tol=1e-10, mixing=SimpleMixing())
end

scfres_cpu = run_problem(; architecture=DFTK.CPU())
scfres_gpu = run_problem(; architecture=DFTK.GPU(CuArray))
@test abs(scfres_cpu.energies.total - scfres_gpu.energies.total) < 1e-10
@test norm(scfres_cpu.ρ - Array(scfres_gpu.ρ)) < 1e-8
@test norm(compute_forces(scfres_cpu) - compute_forces(scfres_gpu)) < 1e-7
end


Expand All @@ -52,4 +82,3 @@ end
# TODO meta GGA
# TODO Aluminium with LdosMixing
# TODO Anderson acceleration
# TODO Norm-conserving pseudopotentials with non-linear core

0 comments on commit ed7ae28

Please sign in to comment.