diff --git a/SConstruct b/SConstruct index f7fddb2..4dc4832 100644 --- a/SConstruct +++ b/SConstruct @@ -502,9 +502,10 @@ def add_cluster_analysis(w): where N is the ranking of vlads untrimmed deduplication, and M is the multiplicity of said deduplication.""" # This option controls which sequences get joined on in the merge for the partis_seqmeta file, which has # orig/new names, joined on sequence from the other file - sources = {'--partis-seqmeta': c['partis_seqmeta'], + sources = {'--partis-seqmeta': c['partis_seqmeta'], '--cluster-mapping': c['cluster_mapping'] if c['reconstruction']['prune_strategy'] == 'min_adcl' else None, - } + '--pruned-ids': c['pruned_ids'] if c['reconstruction']['prune_strategy'] == 'seed_lineage' else None, + } sources = {k: v for k, v in sources.items() if v} base_call = 'aggregate_minadcl_cluster_multiplicities.py ' for i, (k, v) in enumerate(sources.items()): diff --git a/bin/aggregate_minadcl_cluster_multiplicities.py b/bin/aggregate_minadcl_cluster_multiplicities.py index 7b1ff06..3865b45 100755 --- a/bin/aggregate_minadcl_cluster_multiplicities.py +++ b/bin/aggregate_minadcl_cluster_multiplicities.py @@ -5,7 +5,9 @@ import collections #import itertools - +def filter_by_ids(results, ids): + filtered_results = [result for result in results if result['unique_id'] in ids] + return filtered_results def aggregate_clusters(merge_results, cluster_mapping): merge_results = {row['sequence']: row for row in merge_results} @@ -69,11 +71,18 @@ def seqmeta_reader(filename): 'duplicates': d['duplicates'].split(':')}) for d in data] +def pruned_ids_reader(filename): + data = set() + with open(filename) as f: + for line in f: + data.add(line.rstrip()) + return data def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--cluster-mapping', type=cluster_reader) parser.add_argument('--partis-seqmeta', type=seqmeta_reader) + parser.add_argument('--pruned-ids', type=pruned_ids_reader) parser.add_argument('output', type=argparse.FileType('w')) args = parser.parse_args() return args @@ -91,6 +100,8 @@ def main(): results = args.partis_seqmeta if args.cluster_mapping: results = aggregate_clusters(results, args.cluster_mapping) + if args.pruned_ids: + results = filter_by_ids(results, args.pruned_ids) out_writer.writerows(format_results(results)) args.output.close() diff --git a/site_scons/software_versions.py b/site_scons/software_versions.py index 4c2dc5c..96071e0 100644 --- a/site_scons/software_versions.py +++ b/site_scons/software_versions.py @@ -25,9 +25,9 @@ def tripl_version(): 'muscle': 'muscle -version', 'seqmagick': 'seqmagick --version', 'FastTree': None, - 'prank': 'prank -v', +# 'prank': 'prank -v', 'tripl': tripl_version, - #'nestly': lambda: nestly.__version__, +# 'nestly': lambda: nestly.__version__, 'ete3': lambda: ete3.__version__, 'biopython': lambda: Bio.__version__, 'scons': 'scons -v',