Skip to content

Commit

Permalink
Bug repaired
Browse files Browse the repository at this point in the history
  • Loading branch information
cafferychen777 committed Apr 4, 2023
1 parent 359725d commit 94dc89e
Show file tree
Hide file tree
Showing 9 changed files with 287 additions and 357 deletions.
35 changes: 8 additions & 27 deletions R/ggpicrust2.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @param p.adjust A character, the method to adjust p-values
#' @param order A character to control the order of the main plot rows
#' @param p_values_bar A character to control if the main plot has the p_values bar
#' @param x_lab A character to control the x-axis label name
#' @param x_lab A character to control the x-axis label name, you can choose from "feature","pathway_name" and "description"
#' @param select A vector consisting of pathway names to be selected
#' @param reference A character, a reference group level for several DA methods
#' @param colors A vector consisting of colors number
Expand Down Expand Up @@ -49,7 +49,7 @@ ggpicrust2 <- function(file,
p.adjust = "BH",
order = "group",
p_values_bar = TRUE,
x_lab = "pathway_name",
x_lab = NULL,
select = NULL,
reference = NULL,
colors = NULL) {
Expand All @@ -72,42 +72,22 @@ ggpicrust2 <- function(file,
if (sum(as.numeric(daa_results_df$p_adjust <= 0.05)) == 0){
stop("There are no statistically significant biomarkers")
}
if (x_lab == "pathway_name") {
daa_results_df <-
pathway_annotation(daa_results_df = daa_results_df, ko_to_kegg = TRUE)
}
j <- 1
for (i in unique(daa_results_df$method)) {
daa_sub_method_results_df <-
daa_results_df[daa_results_df[, "method"] == i,]
daa_sub_method_results_df_sorted <- data.frame()
if (is.null(select)){
daa_sub_method_results_df_sorted <- daa_sub_method_results_df
}else if (select == "Top 10"){
# Sort daa_sub_method_results_df by p_adjust, from smallest to largest
daa_sub_method_results_df_sorted <- daa_sub_method_results_df[order(daa_sub_method_results_df$p_adjust),]
#Keep the ten smallest records p_adjust
daa_sub_method_results_df_sorted <- daa_sub_method_results_df_sorted[1:10,]
}else if (select == "Top 20"){
# Sort daa_sub_method_results_df by p_adjust, from smallest to largest
daa_sub_method_results_df_sorted <- daa_sub_method_results_df[order(daa_sub_method_results_df$p_adjust),]
# Keep p_adjust minimum of 20 records
daa_sub_method_results_df_sorted <- daa_sub_method_results_df_sorted[1:20,]
}else if (select == "Top 30"){
# Sort daa_sub_method_results_df by p_adjust, from smallest to largest
daa_sub_method_results_df_sorted <- daa_sub_method_results_df[order(daa_sub_method_results_df$p_adjust),]
# Keep p_adjust minimum of 30 records
daa_sub_method_results_df_sorted <- daa_sub_method_results_df_sorted[1:30,]
}
combination_bar_plot <-
pathway_errorbar(
abundance = abundance,
daa_results_df = daa_sub_method_results_df_sorted,
daa_results_df = daa_sub_method_results_df,
Group = metadata[, group],
ko_to_kegg = ko_to_kegg,
p_value_bar = p_values_bar,
order = "pathway_class",
order = order,
colors = colors,
select = select,
x_lab = x_lab
)
# Create a sublist containing a combination_bar_plot and the corresponding subset of daa_results_df
Expand All @@ -126,8 +106,8 @@ ggpicrust2 <- function(file,
escape_double = FALSE,
trim_ws = TRUE
)
abundance <-
tibble::column_to_rownames(abundance, var = "function")
rownames(abundance) <- abundance[,1]
abundance <- abundance[,-1]
daa_results_df <- pathway_daa(
abundance = abundance,
metadata = metadata,
Expand Down Expand Up @@ -156,6 +136,7 @@ ggpicrust2 <- function(file,
p_value_bar = p_values_bar,
order = order,
colors = colors,
select = select,
x_lab = x_lab
)
# Create a sublist
Expand Down
4 changes: 2 additions & 2 deletions R/pathway_daa.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @param metadata a tibble containing samples information
#' @param group a character specifying the group name for differential abundance analysis
#' @param daa_method a character specifying the method for differential abundance analysis, default is "ALDEx2"
#' @param select a vector containing pathway names for analysis, if NULL all pathways are included, default is NULL
#' @param select a vector containing sample names for analysis, if NULL all samples are included, default is NULL
#' @param p.adjust a character specifying the method for p-value adjustment, default is "BH"
#' @param reference a character specifying the reference group level, required for several differential abundance analysis methods such as LinDA, limme voom and Maaslin2, default is NULL
#'
Expand Down Expand Up @@ -103,7 +103,7 @@ pathway_daa <-
p_values = c(ALDEx2_results$we.ep, ALDEx2_results$wi.ep)
)
} else {
#messgae("ALDEx2 takes a long time to complete the calculation, please wait patiently.")
message("ALDEx2 takes a long time to complete the calculation, please wait patiently.")
ALDEx2_object <-
ALDEx2::aldex.clr(
ALDEx2_abundance,
Expand Down
16 changes: 13 additions & 3 deletions R/pathway_errorbar.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
#'
#' @name pathway_errorbar
#' @param abundance A data frame with row names representing pathways and column names representing samples. Each element represents the relative abundance of the corresponding pathway in the corresponding sample.
#' @param daa_results_df A data frame containing the results of the differential abundance analysis of the pathways, generated by the pathway_daa function.
#' @param daa_results_df A data frame containing the results of the differential abundance analysis of the pathways, generated by the pathway_daa function. x_lab should be a column name of daa_results_df.
#' @param Group A data frame or a vector that assigns each sample to a group. The groups are used to color the samples in the figure.
#' @param ko_to_kegg A logical parameter indicating whether to convert pathway IDs from KEGG orthology (KO) to KEGG pathway IDs. If TRUE, the conversion will be performed.
#' @param ko_to_kegg A logical parameter indicating whether there was a convertion that convert ko abundance to kegg abundance.
#' @param p_values_threshold A numeric parameter specifying the threshold for statistical significance of differential abundance. Pathways with p-values below this threshold will be considered significant.
#' @param order A parameter controlling the ordering of the rows in the figure. The options are: "p_values" (order by p-values), "name" (order by pathway name), "group" (order by the group with the highest mean relative abundance), or "pathway_class" (order by the pathway category).
#' @param select A vector of pathway names to be included in the figure. This can be used to limit the number of pathways displayed. If NULL, all pathways will be displayed.
#' @param p_value_bar A logical parameter indicating whether to display a bar showing the p-value threshold for significance. If TRUE, the bar will be displayed.
#' @param colors A vector of colors to be used to represent the groups in the figure. Each color corresponds to a group.
#' @param x_lab A character string to be used as the x-axis label in the figure. The default value is "description" for KO IDs and "pathway_name" for KEGG pathway IDs.
#' @param x_lab A character string to be used as the x-axis label in the figure. The default value is "description" for KOs'descriptions and "pathway_name" for KEGG pathway names.
#' @importFrom stats sd
#' @value A ggplot2 plot (`plot`)
#' The plot visualizes the differential abundance results of a specific differential abundance analysis method. The corresponding dataframe contains the results used to create the plot.
Expand All @@ -36,6 +36,15 @@ pathway_errorbar <-
}else{
x_lab <- "description"
}
if (is.null(daa_results_df$pathway_name)&is.null(daa_results_df$pathway_name$description)){
message("Please use pathway_annotation to annotate the daa_results_df")
}
}
if (!(x_lab %in% colnames(daa_results_df))){
message("There is no x_lab you defined in daa_results_df")
}
if (nlevels(factor(daa_results_df$method)) != 1) {
message("There are more than one method in daa_results_df$method, please filter it.")
}
if (is.null(colors)) {
colors <- c("#d93c3e", "#3685bc", "#6faa3e", "#e8a825", "#c973e6", "#ee6b3d", "#2db0a7", "#f25292")[1:nlevels(as.factor(Group))]
Expand Down Expand Up @@ -206,6 +215,7 @@ pathway_errorbar <-
legend.box.just = "right",
plot.margin = ggplot2::margin(0, 0.5, 0.5, 0, unit = "cm")
) + ggplot2::coord_cartesian(clip = "off")

if (ko_to_kegg == TRUE) {
pathway_class_group <-
daa_results_filtered_sub_df$pathway_class %>%
Expand Down
132 changes: 129 additions & 3 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ ggpicrust2() integrates ko abundance to kegg pathway abundance conversion, annot

![](https://github.moeyy.xyz/https://raw.githubusercontent.com/cafferychen777/ggpicrust2_paper/main/paper_figure/workflow_2.png)

### ggpicrust2()

```{r ggpicrust2(), eval = FALSE}
#If you want to analysis kegg pathway abundance instead of ko within the pathway. You should turn ko_to_kegg to TRUE.
#The kegg pathway typically have the more explainable description.
Expand All @@ -110,9 +112,10 @@ daa_results_list <-
ko_to_kegg = TRUE,
order = "pathway_class"
select = NULL,
reference = NULL
reference = NULL # If your metadata[,group] has more than two levels, please specify a reference.
)
#The visualization will be published in viewer.
#If you want to analysis the EC. MetaCyc. KO without conversions. You should turn ko_to_kegg to FALSE.
metadata <-
Expand All @@ -138,7 +141,130 @@ daa_results_list <-
select = NULL,
reference = NULL
)
#The visualization will be published in viewer.
```

### If an error occurs with ggpicrust2, please use the following workflow.

```{r alternative, eval = FALSE}
#If you want to analysis kegg pathway abundance instead of ko within the pathway. You should turn ko_to_kegg to TRUE.
#The kegg pathway typically have the more explainable description.
#metadata should be tibble.
metadata <-
read_delim(
"~/Microbiome/C9orf72/Code And Data/new_metadata.txt",
delim = "\t",
escape_double = FALSE,
trim_ws = TRUE
)
kegg_abundance <-
ko2kegg_abundance(
"/Users/apple/Downloads/pred_metagenome_unstrat.tsv/pred_metagenome_unstrat.tsv"
)
group <- "Enviroment"
daa_results_df <-
pathway_daa(
abundance = kegg_abundance,
metadata = metadata,
group = group,
daa_method = "ALDEx2",
select = NULL,
reference = NULL
)
#if you are using LinDA, limme voom and Maaslin2, please specify a reference just like followings code.
# daa_results_df <- pathway_daa(abundance = kegg_abundance, metadata = metadata, group = group, daa_method = "ALDEx2", select = NULL, reference = "Harvard BRI")
daa_sub_method_results_df <-
daa_results_df[daa_results_df$method == "ALDEx2_Kruskal-Wallace test", ]
daa_annotated_sub_method_results_df <-
pathway_annotation(pathway = "KO",
daa_results_df = daa_sub_method_results_df,
ko_to_kegg = TRUE)
Group <-
metadata$Enviroment # column which you are interested in metadata
# select parameter format in pathway_error() is c("ko00562", "ko00440", "ko04111", "ko05412", "ko00310", "ko04146", "ko00600", "ko04142", "ko00604", "ko04260", "ko04110", "ko04976", "ko05222", "ko05416", "ko00380", "ko05322", "ko00625", "ko00624", "ko00626", "ko00621")
daa_results_list <-
pathway_errorbar(
abundance = kegg_abundance,
daa_results_df = daa_annotated_sub_method_results_df,
Group = Group,
p_values_threshold = 0.05,
order = "pathway_class",
select = NULL,
ko_to_kegg = TRUE,
p_value_bar = TRUE,
colors = NULL,
x_lab = "pathway_name"
)
# x_lab can be set "feature" which represents kegg pathway id.
# If you want to analysis the EC. MetaCyc. KO without conversions. You should turn ko_to_kegg to FALSE.
metadata <-
read_delim(
"~/Microbiome/C9orf72/Code And Data/new_metadata.txt",
delim = "\t",
escape_double = FALSE,
trim_ws = TRUE
)
#ko_abundance should be data.frame instead of tibble
#ID should be in the first column
ko_abundance <-
read.delim(
"/Users/apple/Downloads/pred_metagenome_unstrat.tsv/pred_metagenome_unstrat.tsv"
)
#sometimes there are function, pathway or something else. Change function. when needed
rownames(ko_abundance) <- ko_abundance$function.
ko_abundance <- ko_abundance[, -1]
group <- "Enviroment"
daa_results_df <-
pathway_daa(
abundance = ko_abundance,
metadata = metadata,
group = group,
daa_method = "ALDEx2",
select = NULL,
reference = NULL
)
#if you are using LinDA, limme voom and Maaslin2, please specify a reference just like followings code.
# daa_results_df <- pathway_daa(abundance = kegg_abundance, metadata = metadata, group = group, daa_method = "ALDEx2", select = NULL, reference = "Harvard BRI")
daa_sub_method_results_df <-
daa_results_df[daa_results_df$method == "ALDEx2_Kruskal-Wallace test", ]
daa_annotated_sub_method_results_df <-
pathway_annotation(pathway = "KO",
daa_results_df = daa_sub_method_results_df,
ko_to_kegg = FALSE)
Group <-
metadata$Enviroment # column which you are interested in metadata
# select parameter format in pathway_error() is c("K00001", "K00002", "K00003", "K00004")
daa_results_list <-
pathway_errorbar(
abundance = ko_abundance,
daa_results_df = daa_annotated_sub_method_results_df,
Group = Group,
p_values_threshold = 0.05,
order = "pathway_class",
select = NULL,
ko_to_kegg = FALSE,
p_value_bar = TRUE,
colors = NULL,
x_lab = "description"
)
```

## Output {#output}
Expand Down
Loading

0 comments on commit 94dc89e

Please sign in to comment.