Skip to content

Commit

Permalink
Merge pull request #26 from doyle-lab-ucla/dev2
Browse files Browse the repository at this point in the history
  • Loading branch information
beef-broccoli authored Oct 13, 2023
2 parents 96ee06a + ea84090 commit 801aba2
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 38 deletions.
1 change: 1 addition & 0 deletions autoqchem/helper_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class slurm_status(enum.IntEnum):
failed = 4 #: job failed
incomplete = 5 #: job is incomplete, it should be resubmitted
uploaded = 6 #: job has been uploaded to the DB succesfully
inspect = 7 #: job needs to be inspected due to problematic labeling


@enum.unique
Expand Down
6 changes: 4 additions & 2 deletions autoqchem/rdkit_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def rdmol_from_slurm_jobs(jobs, postDFT=True) -> Chem.Mol:
elements, connectivity_matrix, charges = jobs[0].elements, jobs[0].connectivity_matrix, jobs[0].charges
conformer_coordinates = []
energies = []
labels_ok = True
for j in jobs:
if postDFT:

Expand All @@ -193,7 +194,8 @@ def rdmol_from_slurm_jobs(jobs, postDFT=True) -> Chem.Mol:
le.get_atom_labels()

# verify that the labels are in the same order in gaussian after running it
assert tuple(le.labels) == tuple(elements)
if tuple(le.labels) != tuple(elements):
labels_ok = False

le.get_geometry()
conformer_coordinates.append(le.geom[list('XYZ')].values)
Expand All @@ -216,7 +218,7 @@ def rdmol_from_slurm_jobs(jobs, postDFT=True) -> Chem.Mol:
energies = [AllChem.MMFFGetMoleculeForceField(rdmol, props, confId=i).CalcEnergy()
for i in range(rdmol.GetNumConformers())]

return rdmol, energies
return rdmol, energies, labels_ok


def rdmol_from_sge_jobs(jobs, postDFT=True) -> Chem.Mol:
Expand Down
77 changes: 41 additions & 36 deletions autoqchem/slurm_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,33 +234,33 @@ def _retrieve_single_job(self, job) -> slurm_status:

# initialize the log extractor, it will try to read basic info from the file
le = gaussian_log_extractor(log_file.local)
if len(job.tasks) == le.n_tasks:
job.status = slurm_status.done
else:
try: # look for more specific exception
le.check_for_exceptions()

try: # look for more specific exception
le.check_for_exceptions()

except NoGeometryException:
job.status = slurm_status.failed
logger.warning(
f"Job {job.base_name} failed - the log file does not contain geometry. Cannot resubmit.")
except NoGeometryException:
job.status = slurm_status.failed
logger.warning(
f"Job {job.base_name} failed - the log file does not contain geometry. Cannot resubmit.")

except NegativeFrequencyException:
job.status = slurm_status.incomplete
logger.warning(
f"Job {job.base_name} incomplete - log file contains negative frequencies. Resubmit job.")
except NegativeFrequencyException:
job.status = slurm_status.incomplete
logger.warning(
f"Job {job.base_name} incomplete - log file contains negative frequencies. Resubmit job.")

except OptimizationIncompleteException:
job.status = slurm_status.incomplete
logger.warning(f"Job {job.base_name} incomplete - geometry optimization did not complete.")
except OptimizationIncompleteException:
job.status = slurm_status.incomplete
logger.warning(f"Job {job.base_name} incomplete - geometry optimization did not complete.")

except Exception as e:
job.status = slurm_status.failed
logger.warning(f"Job {job.base_name} failed with unhandled exception: {e}")
except Exception as e:
job.status = slurm_status.failed
logger.warning(f"Job {job.base_name} failed with unhandled exception: {e}")

else: # no exceptions were thrown, but still the job is incomplete
job.status = slurm_status.incomplete
logger.warning(f"Job {job.base_name} incomplete.")
if len(job.tasks) == le.n_tasks:
job.status = slurm_status.done
else: # no exceptions were thrown, but still the job is incomplete
job.status = slurm_status.incomplete
logger.warning(f"Job {job.base_name} incomplete.")

except FileNotFoundError:
job.status = slurm_status.failed
Expand Down Expand Up @@ -369,20 +369,25 @@ def upload_done_molecules_to_db(self, tags, RMSD_threshold=0.35) -> None:

for done_can in done_cans:
(keys, jobs) = zip(*self.get_jobs(can=done_can).items())
rdmol, energies = rdmol_from_slurm_jobs(jobs, postDFT=True)
keep = prune_rmsds(rdmol, RMSD_threshold)
logger.info(f"Molecule {done_can} has {len(keys) - len(keep)} / {len(keys)} duplicate conformers.")

# remove duplicate jobs
can_keys_to_remove = [key for i, key in enumerate(keys) if i not in keep]
to_remove_jobs = {name: job for name, job in self.jobs.items() if name in can_keys_to_remove}
logger.info(
f"Removing {len(keys) - len(keep)} / {len(keys)} jobs and log files that contain duplicate conformers.")
self.remove_jobs(to_remove_jobs)

# upload non-duplicate jobs
can_keys_to_keep = [key for i, key in enumerate(keys) if i in keep]
self._upload_can_to_db(can_keys_to_keep, tags)
rdmol, energies, labels_ok = rdmol_from_slurm_jobs(jobs, postDFT=True)
if labels_ok:
keep = prune_rmsds(rdmol, RMSD_threshold)
logger.info(f"Molecule {done_can} has {len(keys) - len(keep)} / {len(keys)} duplicate conformers.")

# remove duplicate jobs
can_keys_to_remove = [key for i, key in enumerate(keys) if i not in keep]
to_remove_jobs = {name: job for name, job in self.jobs.items() if name in can_keys_to_remove}
logger.info(
f"Removing {len(keys) - len(keep)} / {len(keys)} jobs and log files that contain duplicate conformers.")
self.remove_jobs(to_remove_jobs)

# upload non-duplicate jobs
can_keys_to_keep = [key for i, key in enumerate(keys) if i in keep]
self._upload_can_to_db(can_keys_to_keep, tags)
else:
for key in keys:
self.jobs[key].status = slurm_status.inspect
self._cache()

def _upload_can_to_db(self, keys, tags) -> None:
"""Uploading single molecule conformers to database.
Expand Down

0 comments on commit 801aba2

Please sign in to comment.