Skip to content

Commit

Permalink
fix(fix #508): generate summary files when all classfied for with ANI…
Browse files Browse the repository at this point in the history
… for a domain

adding extra step to add all genomes classified with ANI to the summary
files . This was not taking place when all genomes for a specific domain
were either filtered out or classified with ANI and the msa for the
align step was empty.
  • Loading branch information
pchaumeil committed Apr 27, 2023
1 parent 4169076 commit 5b81742
Showing 1 changed file with 19 additions and 10 deletions.
29 changes: 19 additions & 10 deletions gtdbtk/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ def run(self,
# add filtered genomes to the summary file
warning_counter = self.add_filtered_genomes_to_summary(align_dir, warning_counter, summary_file,
marker_set_id, prefix)

# But if there is Unclassified genomes without domain,
# they still have to be written in the bac120 summary file:
elif marker_set_id == 'bac120':
Expand All @@ -471,6 +472,13 @@ def run(self,
# add filtered genomes to the summary file
warning_counter = self.add_filtered_genomes_to_summary(align_dir, warning_counter, summary_file,
marker_set_id, prefix)

# we add all genomes classified with ANI
if mash_classified_user_genomes and marker_set_id in mash_classified_user_genomes:
list_summary_rows = mash_classified_user_genomes.get(marker_set_id)
for row in list_summary_rows:
summary_file.add_row(row)

if summary_file.has_row():
summary_file.write()
output_files.setdefault(marker_set_id, []).append(summary_file.path)
Expand Down Expand Up @@ -1381,16 +1389,17 @@ def add_filtered_genomes_to_summary(self, align_dir,warning_counter, summary_fil
else:
filtered_file = os.path.join(align_dir,PATH_AR53_FILTERED_GENOMES.format(prefix=prefix))
domain = 'Archaea'

with open(filtered_file) as fin:
for line in fin:
infos = line.strip().split('\t')
summary_row = ClassifySummaryFileRow()
summary_row.gid = infos[0]
summary_row.classification = f'Unclassified {domain}'
summary_row.warnings = infos[1]
summary_file.add_row(summary_row)
warning_counter += 1
# if file exists:
if os.path.exists(filtered_file):
with open(filtered_file) as fin:
for line in fin:
infos = line.strip().split('\t')
summary_row = ClassifySummaryFileRow()
summary_row.gid = infos[0]
summary_row.classification = f'Unclassified {domain}'
summary_row.warnings = infos[1]
summary_file.add_row(summary_row)
warning_counter += 1
return warning_counter

def add_failed_genomes_to_summary(self, align_dir, summary_file, prefix):
Expand Down

0 comments on commit 5b81742

Please sign in to comment.