Skip to content

Commit

Permalink
parallelization option for create_ML_training_data: untested
Browse files Browse the repository at this point in the history
  • Loading branch information
RagnarB83 committed Aug 6, 2024
1 parent 61320fa commit 37f1324
Showing 1 changed file with 110 additions and 43 deletions.
153 changes: 110 additions & 43 deletions ash/modules/module_machine_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# Function to create ML training data given XYZ-files and 2 ASH theories
def create_ML_training_data(xyzdir=None, xyz_trajectory=None, num_snapshots=100, random_snapshots=True,
theory_1=None, theory_2=None, charge=0, mult=1, Grad=True):
theory_1=None, theory_2=None, charge=0, mult=1, Grad=True, runmode="serial", numcores=1):
if xyzdir is None and xyz_trajectory is None:
print("Error: create_ML_training_data requires xyzdir or xyz_trajectory option to be set!")
ashexit()
Expand Down Expand Up @@ -96,49 +96,116 @@ def create_ML_training_data(xyzdir=None, xyz_trajectory=None, num_snapshots=100,
os.remove(f)
except:
pass
energies_file=open("train_data.energies", "w")
gradients_file=open("train_data.gradients", "w")

# LOOP
print("Starting loop over XYZ-files")
for file in list_of_xyz_files:
print("Now running file:", file)
basefile=os.path.basename(file)
label=basefile.split(".")[0]


frag = Fragment(xyzfile=file, charge=charge, mult=mult)

# 1: gas 2:solv or 1: LL or 2: HL
print("Now running Theory 1")
result_1 = Singlepoint(theory=theory_1, fragment=frag, Grad=Grad)

if delta is True:
# Running theory 2
print("Now running Theory 2")
result_2 = Singlepoint(theory=theory_2, fragment=frag, Grad=Grad)
# Delta energy
energy = result_2.energy - result_1.energy
if Grad is True:
gradient = result_2.gradient - result_1.gradient
else:
energy = result_1.energy
if runmode=="serial":
energies_file=open("train_data.energies", "w")
if Grad:
gradients_file=open("train_data.gradients", "w")
print("Runmode is serial!")
print("Will now loop over XYZ-files")
print("For a large dataset consider using parallel runmode")
for file in list_of_xyz_files:
print("Now running file:", file)
basefile=os.path.basename(file)
label=basefile.split(".")[0]
frag = Fragment(xyzfile=file, charge=charge, mult=mult)

# 1: gas 2:solv or 1: LL or 2: HL
print("Now running Theory 1")
result_1 = Singlepoint(theory=theory_1, fragment=frag, Grad=Grad)

if delta is True:
# Running theory 2
print("Now running Theory 2")
result_2 = Singlepoint(theory=theory_2, fragment=frag, Grad=Grad)
# Delta energy
energy = result_2.energy - result_1.energy
if Grad is True:
gradient = result_2.gradient - result_1.gradient
else:
energy = result_1.energy
if Grad is True:
gradient = result_1.gradient

# Create files for ML

energies_file.write(f"{energy}\n")
# Gradients-file
if Grad is True:
gradient = result_1.gradient

# Create files for ML

energies_file.write(f"{energy}\n")
# Gradients-file
gradients_file.write(f"{frag.numatoms}\n")
gradients_file.write(f"gradient {label} \n")
for g in gradient:
gradients_file.write(f"{g[0]:10.7f} {g[1]:10.7f} {g[2]:10.7f}\n")

# MultiXYZ-file
write_xyzfile(frag.elems, frag.coords, "train_data", printlevel=2, writemode='a', title=f"coords {label}")

energies_file.close()
gradients_file.close()

gradients_file.write(f"{frag.numatoms}\n")
gradients_file.write(f"gradient {label} \n")
for g in gradient:
gradients_file.write(f"{g[0]:10.7f} {g[1]:10.7f} {g[2]:10.7f}\n")

# MultiXYZ-file
write_xyzfile(frag.elems, frag.coords, "train_data", printlevel=2, writemode='a', title=f"coords {label}")

energies_file.close()
if Grad:
gradients_file.close()
elif runmode=="parallel":
print("Runmode is parallel!")
print("Will now run parallel calculations")

#Fragments
print("Looping over fragments first")
all_fragments=[]
labels=[]
for file in list_of_xyz_files:
print("Now running file:", file)
basefile=os.path.basename(file)
label=basefile.split(".")[0]
labels.append(label)
# Creating fragment with label
frag = Fragment(xyzfile=file, charge=charge, mult=mult, label=label)
all_fragments.append(frag)
write_xyzfile(frag.elems, frag.coords, "train_data", printlevel=2, writemode='a', title=f"coords {label}")

# Parallel run
print("Making sure numcores is set to 1 for both theories")
theory_1.set_numcores(1)


from ash.functions.functions_parallel import Job_parallel
print("Now starting in parallel mode Theory1 calculations")
results_theory1 = Job_parallel(fragments=all_fragments, theories=[theory_1], numcores=numcores, Grad=True)
print("results_theory1.energies_dict:", results_theory1.energies_dict)
if delta is True:
theory_2.set_numcores(1)
print("Now starting in parallel mode Theory2 calculations")
results_theory2 = Job_parallel(fragments=all_fragments, theories=[theory_2], numcores=numcores, Grad=True)
print("results_theory2.energies_dict:", results_theory2.energies_dict)

energies_file=open("train_data.energies", "w")
if Grad:
gradients_file=open("train_data.gradients", "w")

#Loop over energy dict:
for l in labels:
if delta is True:
energy = results_theory2.energies_dict[l] - results_theory1.energies_dict[l]
print("energy:", energy)

else:
energy = results_theory1.energies_dict[l]
# Create files for ML
energies_file.write(f"{energy}\n")
# Gradients-file
if Grad:
if delta is True:
gradient = results_theory2.gradients_dict[l] - results_theory1.gradients_dict[l]
else:
gradient = results_theory1.gradients_dict[l]

# Gradients-file
gradients_file.write(f"{frag.numatoms}\n")
gradients_file.write(f"gradient {label} \n")
for g in gradient:
gradients_file.write(f"{g[0]:10.7f} {g[1]:10.7f} {g[2]:10.7f}\n")

energies_file.close()
if Grad:
gradients_file.close()

print("All done! Files created:\ntrain_data.xyz\ntrain_data.energies\ntrain_data.gradients")

0 comments on commit 37f1324

Please sign in to comment.