Skip to content

Commit

Permalink
Process data in ingest to add url field to metadata [#25]
Browse files Browse the repository at this point in the history
This allows Auspice to automatically link the accession number back to
Genbank in the detailed node popover.
  • Loading branch information
genehack committed Dec 5, 2024
1 parent 5b0d09c commit 3f2e00b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
1 change: 1 addition & 0 deletions ingest/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ curate:
- full_authors
- authors
- institution
- url
nextclade:
dataset_name: "nextstrain/yellow-fever/prM-E"
field_map: "defaults/nextclade_field_map.tsv"
Expand Down
19 changes: 18 additions & 1 deletion ingest/rules/curate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ rule curate:
all_geolocation_rules="data/all-geolocation-rules.tsv",
annotations=config["curate"]["annotations"],
output:
metadata="data/all_metadata.tsv",
metadata=temp("data/all_metadata_intermediate.tsv"),
sequences="results/sequences.fasta",
log:
"logs/curate.txt",
Expand Down Expand Up @@ -116,6 +116,23 @@ rule curate:
"""


rule add_genbank_url:
input:
metadata=temp("data/all_metadata_intermediate.tsv"),
output:
metadata="data/all_metadata.tsv",
log:
"logs/add_genbank_url",
benchmark:
"benchmarks/add_genbank_url",
shell:
r"""
csvtk mutate2 -tl \
-n url \
-e '"https://www.ncbi.nlm.nih.gov/nuccore/" + $accession' \
{input.metadata:q} > {output.metadata:q} 2> {log:q}
"""

rule subset_metadata:
input:
metadata="data/all_metadata.tsv",
Expand Down

0 comments on commit 3f2e00b

Please sign in to comment.