-
Notifications
You must be signed in to change notification settings - Fork 2
/
sourmash_gather_to_metacoder_plot.R
executable file
·53 lines (44 loc) · 2.58 KB
/
sourmash_gather_to_metacoder_plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#! /usr/bin/env Rscript
# from https://www.r-bloggers.com/2015/09/passing-arguments-to-an-r-script-from-command-lines/
args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input gather CSV)", call.=FALSE)
} else if (length(args)==1) {
# default output file
args[2] = "metacoder_gather.png"
}
# from https://github.com/sourmash-bio/sourmash/issues/2041
library(dplyr)
library(readr)
library(metacoder)
# read in gdtb rs207 lineage/taxonomy information and format
# csv available here: https://osf.io/p6z3w/
gtdb_lineages <- read_csv("gtdb-rs207.taxonomy.csv.gz") %>% # read in gtdb rs207 lineage information
mutate(full_lineage = paste(sep = ";", superkingdom, phylum, class, order, family, genus, species)) %>% # format lineage into single string
select(ident, full_lineage) # keep columns that will be used by metacoder
# csv generated by running sourmash gather on e.g. a metagenome
gather_results <- read_csv(args[1], col_types = "ddddddddcccddddcccd") %>% # read in gather results
mutate(ident = gsub(" .*", "", name)) %>% # format name string to ident
left_join(gtdb_lineages, by = "ident") %>% # join to lineage information
select(ident, full_lineage, f_unique_weighted) # keep columns that will be used by metacoder
# parse gather and gtdb information into a metacoder object
obj <- parse_tax_data(gather_results,
class_cols = "full_lineage", # the column that contains taxonomic information
class_sep = ";", # The character used to separate taxa in the classification
class_regex = "^(.+)__(.+)$", # Regex identifying where the data for each taxon is
class_key = c(tax_rank = "info", # A key describing each regex capture group
tax_name = "taxon_name"))
# set varimp at taxon level instead of just species level
# note: We only use "f_unique_weighted". Any numeric data could be included.
# It would need to be added below, to the cols parameter, and above in the
# select() function.
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = c("f_unique_weighted"))
heat_tree(obj,
node_label = taxon_names,
node_size = obj$data$tax_abund$f_unique_weighted,
node_color = obj$data$tax_abund$f_unique_weighted,
#node_label_size_range = c(0.01, 0.03), # uncomment to finely control text size
node_color_axis_label = "color",
node_size_axis_label = "size",
output_file = args[2])