Skip to content

Commit

Permalink
Merge pull request #168 from picrust/v2.4.0
Browse files Browse the repository at this point in the history
V2.4.0
  • Loading branch information
gavinmdouglas authored Mar 29, 2021
2 parents 8a2e3df + 24b0318 commit 0d3150c
Show file tree
Hide file tree
Showing 44 changed files with 40,545 additions and 206 deletions.
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# PICRUSt2

**Please note that PICRUSt2 is currently in beta**.

See the [PICRUSt2 wiki](https://github.com/picrust/picrust2/wiki) for the documentation.
Pleae see the [PICRUSt2 wiki](https://github.com/picrust/picrust2/wiki) for the documentation and tutorials.

Note that this repository contains the source code for MinPath, which is distributed under the GNU General Public License.

19 changes: 10 additions & 9 deletions picrust2-env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@ channels:
- defaults

dependencies:
- biom-format >=2.1.7
- epa-ng=0.3.6
- gappa=0.5.1
- biom-format >=2.1.10
- epa-ng=0.3.8
- gappa=0.7.0
- glpk=4.65
- h5py >=2.9.0
- h5py >=3.1.0
- hmmer >=3.1b2,<=3.2.1
- jinja2 >=2.10.1
- joblib >=0.13.1
- numpy >=1.16.2
- pandas >=0.24.2,<=0.25.2
- jinja2 >=2.11.3
- joblib >=1.0.1
- numpy >=1.19.5
- pandas >=1.1.5
- pytest >=4.4.1
- pytest-cov >=2.6.1
- python >=3.5,<3.7
- r-base >=3.5.1
- r-castor=1.5.0
- r-castor=1.6.5
- scipy >=1.2.1
- sepp=4.3.10
57 changes: 48 additions & 9 deletions picrust2/Rscripts/castor_hsp.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ Args <- commandArgs(TRUE)
full_tree <- read_tree(file=Args[1], check_label_uniqueness = TRUE)
trait_values <- read.delim(Args[2], check.names=FALSE, row.names=1)
hsp_method <- Args[3]
calc_ci <- as.logical(Args[4])
check_input_set <- as.logical(Args[5])
predict_outfile <- Args[6]
ci_outfile <- Args[7]
seed_setting <- Args[8]
edge_exponent_set <- as.numeric(Args[4])
calc_ci <- as.logical(Args[5])
check_input_set <- as.logical(Args[6])
predict_outfile <- Args[7]
ci_outfile <- Args[8]
seed_setting <- Args[9]

# Set random seed if integer specified.
if(seed_setting != "None") {
Expand Down Expand Up @@ -67,7 +68,7 @@ mp_study_probs <- function(in_trait, in_tree ,unknown_i, check_input) {
tip_states = in_trait,
check_input=check_input,
transition_costs = "proportional",
edge_exponent=0.5,
edge_exponent=edge_exponent_set,
weight_by_scenarios = TRUE)

return(get_sorted_prob(mp_hsp_out$likelihoods,
Expand All @@ -89,14 +90,43 @@ emp_prob_study_probs <- function(in_trait, in_tree, unknown_i, check_input) {
tree_tips=in_tree$tip.label))
}


# First check if any tip labels are present in the trait table but do not match
# because they have quotes around them.
if(length(grep("\"", full_tree$tip.label) > 0) || length(grep("\'", full_tree$tip.label) > 0)) {

tmp_unknown_tips_index <- which(! full_tree$tip.label %in% rownames(trait_values))
tmp_unknown_tips <- full_tree$tip.label[tmp_unknown_tips_index]

unknown_labels_no_quotes <- gsub("\'", "", tmp_unknown_tips)
unknown_labels_no_quotes <- gsub("\"", "", unknown_labels_no_quotes)

no_quote_matches = which(unknown_labels_no_quotes %in% rownames(trait_values))

# Remove quotes from around tips where matches only occur when quotes are removed.
if(length(no_quote_matches) > 0) {
indices_to_change <- tmp_unknown_tips_index[no_quote_matches]
full_tree$tip.label[indices_to_change] <- unknown_labels_no_quotes[no_quote_matches]
}

cat(paste("\nWhile running castor_hsp.R: Fixed ", length(no_quote_matches),
" tip label(s) that had quotations around them that stopped them from matching the function abundance table.",
sep=""))
}

# Order the trait table to match the tree tip labels. Set all tips without a value to be NA.
unknown_tips_index <- which(! full_tree$tip.label %in% rownames(trait_values))
unknown_tips <- full_tree$tip.label[unknown_tips_index]
num_unknown = length(unknown_tips)
num_unknown <- length(unknown_tips)
num_known <- length(full_tree$tip.label) - num_unknown

# Throw error if all tips are unknown.
if(num_unknown == length(full_tree$tip.label)) {
stop("None of the reference ids within the function abundance table are found within the input tree. This can occur when malformed or mismatched custom reference files are used.")

# Check if only very few tips are "known".
} else if(num_known / nrow(trait_values) < 0.1) {
stop("Fewer than 10% of reference ids in the function abundance table are in the tree. This could be because the ids are slightly different between the table and the tree.")
}

unknown_df <- as.data.frame(matrix(NA,
Expand All @@ -119,7 +149,7 @@ trait_values <- trait_values[full_tree$tip.label, , drop=FALSE]

num_tip <- nrow(trait_values)

if (hsp_method == "pic" | hsp_method == "scp" | hsp_method == "subtree_average") {
if (hsp_method == "pic" || hsp_method == "scp" || hsp_method == "subtree_average") {

if (hsp_method == "pic") {
predict_out <- lapply(trait_values,
Expand Down Expand Up @@ -150,7 +180,7 @@ if (hsp_method == "pic" | hsp_method == "scp" | hsp_method == "subtree_average")
remove(predict_out)
invisible(gc(verbose = FALSE))

} else if(hsp_method == "emp_prob" | hsp_method == "mp") {
} else if(hsp_method == "emp_prob" || hsp_method == "mp") {

# Add 1 to all input counts because because traits states need to start at 1.
trait_values <- trait_values + 1
Expand Down Expand Up @@ -211,5 +241,14 @@ predicted_values <- data.frame(predicted_values, check.names = FALSE)
predicted_values$sequence <- unknown_tips
predicted_values <- predicted_values[, c("sequence", colnames(trait_values))]

# Check to see if there are any columns that are totally missing.
missing_values_by_column <- colSums(is.na(predicted_values))

if(length(which(missing_values_by_column == nrow(predicted_values))) > 0) {
stop("\nError - at least one trait in the prediction table was entirely missing values.")
} else if(length(which(missing_values_by_column > 0)) > 0) {
cat("\nWarning: there are missing values in the output prediction table.")
}

# Write out predicted values.
write.table(predicted_values, file=predict_outfile, row.names=FALSE, quote=FALSE, sep="\t")
4 changes: 2 additions & 2 deletions picrust2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python

__author__ = "The PICRUSt Development Team"
__copyright__ = "Copyright 2018-2020, PICRUSt Project"
__copyright__ = "Copyright 2018-2021, PICRUSt Project"
__license__ = "GPL"
__url__ = "https://github.com/picrust/picrust2"
__version__ = "2.3.0-b"
__version__ = "2.4.0"
8 changes: 5 additions & 3 deletions picrust2/default.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env python

__copyright__ = "Copyright 2018-2020, The PICRUSt Project"
__copyright__ = "Copyright 2018-2021, The PICRUSt Project"
__license__ = "GPL"
__version__ = "2.3.0-b"
__version__ = "2.4.0"

from os import path

Expand All @@ -11,14 +11,16 @@
default_ref_dir = path.join(project_dir, "default_files", "prokaryotic",
"pro_ref")

default_fasta = path.join(default_ref_dir, "pro_ref.fna.gz")
default_fasta = path.join(default_ref_dir, "pro_ref.fna")

default_tree = path.join(default_ref_dir, "pro_ref.tre")

default_hmm = path.join(default_ref_dir, "pro_ref.hmm")

default_model = path.join(default_ref_dir, "pro_ref.model")

default_raxml_info = path.join(default_ref_dir, "pro_ref.raxml_info")

default_regroup_map = path.join(project_dir, "default_files",
"pathway_mapfiles",
"ec_level4_to_metacyc_rxn.tsv")
Expand Down
Loading

0 comments on commit 0d3150c

Please sign in to comment.