Bug repaired

cafferychen777 · Apr 4, 2023 · 94dc89e · 94dc89e
1 parent 359725d
commit 94dc89e
Show file tree

Hide file tree

Showing 9 changed files with 287 additions and 357 deletions.
diff --git a/R/ggpicrust2.R b/R/ggpicrust2.R
@@ -9,7 +9,7 @@
 #' @param p.adjust A character, the method to adjust p-values
 #' @param order A character to control the order of the main plot rows
 #' @param p_values_bar A character to control if the main plot has the p_values bar
-#' @param x_lab A character to control the x-axis label name
+#' @param x_lab A character to control the x-axis label name, you can choose from "feature","pathway_name" and "description"
 #' @param select A vector consisting of pathway names to be selected
 #' @param reference A character, a reference group level for several DA methods
 #' @param colors A vector consisting of colors number
@@ -49,7 +49,7 @@ ggpicrust2 <- function(file,
                        p.adjust = "BH",
                        order = "group",
                        p_values_bar = TRUE,
-                       x_lab = "pathway_name",
+                       x_lab = NULL,
                        select = NULL,
                        reference = NULL,
                        colors = NULL) {
@@ -72,42 +72,22 @@ ggpicrust2 <- function(file,
            if (sum(as.numeric(daa_results_df$p_adjust <= 0.05)) == 0){
              stop("There are no statistically significant biomarkers")
            }
-           if (x_lab == "pathway_name") {
              daa_results_df  <-
                pathway_annotation(daa_results_df = daa_results_df, ko_to_kegg = TRUE)
-           }
            j <- 1
            for (i in unique(daa_results_df$method)) {
              daa_sub_method_results_df <-
                daa_results_df[daa_results_df[, "method"] == i,]
-             daa_sub_method_results_df_sorted <- data.frame()
-             if (is.null(select)){
-               daa_sub_method_results_df_sorted <- daa_sub_method_results_df
-             }else if (select == "Top 10"){
-               # Sort daa_sub_method_results_df by p_adjust, from smallest to largest
-               daa_sub_method_results_df_sorted <- daa_sub_method_results_df[order(daa_sub_method_results_df$p_adjust),]
-               #Keep the ten smallest records p_adjust
-               daa_sub_method_results_df_sorted <- daa_sub_method_results_df_sorted[1:10,]
-             }else if (select == "Top 20"){
-               # Sort daa_sub_method_results_df by p_adjust, from smallest to largest
-               daa_sub_method_results_df_sorted <- daa_sub_method_results_df[order(daa_sub_method_results_df$p_adjust),]
-               # Keep p_adjust minimum of 20 records
-               daa_sub_method_results_df_sorted <- daa_sub_method_results_df_sorted[1:20,]
-             }else if (select == "Top 30"){
-               # Sort daa_sub_method_results_df by p_adjust, from smallest to largest
-               daa_sub_method_results_df_sorted <- daa_sub_method_results_df[order(daa_sub_method_results_df$p_adjust),]
-               # Keep p_adjust minimum of 30 records
-               daa_sub_method_results_df_sorted <- daa_sub_method_results_df_sorted[1:30,]
-             }
              combination_bar_plot <-
                pathway_errorbar(
                  abundance = abundance,
-                 daa_results_df = daa_sub_method_results_df_sorted,
+                 daa_results_df = daa_sub_method_results_df,
                  Group = metadata[, group],
                  ko_to_kegg = ko_to_kegg,
                  p_value_bar = p_values_bar,
-                 order = "pathway_class",
+                 order = order,
                  colors = colors,
+                 select = select,
                  x_lab = x_lab
                )
              # Create a sublist containing a combination_bar_plot and the corresponding subset of daa_results_df
@@ -126,8 +106,8 @@ ggpicrust2 <- function(file,
              escape_double = FALSE,
              trim_ws = TRUE
            )
-           abundance <-
-             tibble::column_to_rownames(abundance, var = "function")
+           rownames(abundance) <- abundance[,1]
+           abundance <- abundance[,-1]
            daa_results_df <- pathway_daa(
              abundance = abundance,
              metadata = metadata,
@@ -156,6 +136,7 @@ ggpicrust2 <- function(file,
                  p_value_bar = p_values_bar,
                  order = order,
                  colors = colors,
+                 select = select,
                  x_lab = x_lab
                )
              # Create a sublist

diff --git a/R/pathway_daa.R b/R/pathway_daa.R
@@ -4,7 +4,7 @@
 #' @param metadata a tibble containing samples information
 #' @param group a character specifying the group name for differential abundance analysis
 #' @param daa_method a character specifying the method for differential abundance analysis, default is "ALDEx2"
-#' @param select a vector containing pathway names for analysis, if NULL all pathways are included, default is NULL
+#' @param select a vector containing sample names for analysis, if NULL all samples are included, default is NULL
 #' @param p.adjust a character specifying the method for p-value adjustment, default is "BH"
 #' @param reference a character specifying the reference group level, required for several differential abundance analysis methods such as LinDA, limme voom and Maaslin2, default is NULL
 #'
@@ -103,7 +103,7 @@ pathway_daa <-
               p_values = c(ALDEx2_results$we.ep, ALDEx2_results$wi.ep)
             )
         } else {
-          #messgae("ALDEx2 takes a long time to complete the calculation, please wait patiently.")
+          message("ALDEx2 takes a long time to complete the calculation, please wait patiently.")
           ALDEx2_object <-
             ALDEx2::aldex.clr(
               ALDEx2_abundance,

diff --git a/R/pathway_errorbar.R b/R/pathway_errorbar.R
@@ -2,15 +2,15 @@
 #'
 #' @name pathway_errorbar
 #' @param abundance A data frame with row names representing pathways and column names representing samples. Each element represents the relative abundance of the corresponding pathway in the corresponding sample.
-#' @param daa_results_df A data frame containing the results of the differential abundance analysis of the pathways, generated by the pathway_daa function.
+#' @param daa_results_df A data frame containing the results of the differential abundance analysis of the pathways, generated by the pathway_daa function. x_lab should be a column name of daa_results_df.
 #' @param Group A data frame or a vector that assigns each sample to a group. The groups are used to color the samples in the figure.
-#' @param ko_to_kegg A logical parameter indicating whether to convert pathway IDs from KEGG orthology (KO) to KEGG pathway IDs. If TRUE, the conversion will be performed.
+#' @param ko_to_kegg A logical parameter indicating whether there was a convertion that convert ko abundance to kegg abundance.
 #' @param p_values_threshold A numeric parameter specifying the threshold for statistical significance of differential abundance. Pathways with p-values below this threshold will be considered significant.
 #' @param order A parameter controlling the ordering of the rows in the figure. The options are: "p_values" (order by p-values), "name" (order by pathway name), "group" (order by the group with the highest mean relative abundance), or "pathway_class" (order by the pathway category).
 #' @param select A vector of pathway names to be included in the figure. This can be used to limit the number of pathways displayed. If NULL, all pathways will be displayed.
 #' @param p_value_bar A logical parameter indicating whether to display a bar showing the p-value threshold for significance. If TRUE, the bar will be displayed.
 #' @param colors A vector of colors to be used to represent the groups in the figure. Each color corresponds to a group.
-#' @param x_lab A character string to be used as the x-axis label in the figure. The default value is "description" for KO IDs and "pathway_name" for KEGG pathway IDs.
+#' @param x_lab A character string to be used as the x-axis label in the figure. The default value is "description" for KOs'descriptions and "pathway_name" for KEGG pathway names.
 #' @importFrom stats sd
 #' @value A ggplot2 plot (`plot`)
 #' The plot visualizes the differential abundance results of a specific differential abundance analysis method. The corresponding dataframe contains the results used to create the plot.
@@ -36,6 +36,15 @@ pathway_errorbar <-
       }else{
         x_lab <- "description"
       }
+      if (is.null(daa_results_df$pathway_name)&is.null(daa_results_df$pathway_name$description)){
+        message("Please use pathway_annotation to annotate the daa_results_df")
+      }
+    }
+    if (!(x_lab %in% colnames(daa_results_df))){
+      message("There is no x_lab you defined in daa_results_df")
+    }
+    if (nlevels(factor(daa_results_df$method)) != 1) {
+      message("There are more than one method in daa_results_df$method, please filter it.")
     }
     if (is.null(colors)) {
       colors <- c("#d93c3e", "#3685bc", "#6faa3e", "#e8a825", "#c973e6", "#ee6b3d", "#2db0a7", "#f25292")[1:nlevels(as.factor(Group))]
@@ -206,6 +215,7 @@ pathway_errorbar <-
         legend.box.just = "right",
         plot.margin = ggplot2::margin(0, 0.5, 0.5, 0, unit = "cm")
       ) + ggplot2::coord_cartesian(clip = "off")
+
     if (ko_to_kegg == TRUE) {
       pathway_class_group <-
         daa_results_filtered_sub_df$pathway_class %>%

diff --git a/README.Rmd b/README.Rmd
@@ -87,6 +87,8 @@ ggpicrust2() integrates ko abundance to kegg pathway abundance conversion, annot
 
 ![](https://github.moeyy.xyz/https://raw.githubusercontent.com/cafferychen777/ggpicrust2_paper/main/paper_figure/workflow_2.png)
 
+### ggpicrust2()
+
 ```{r ggpicrust2(), eval = FALSE}
 #If you want to analysis kegg pathway abundance instead of ko within the pathway. You should turn ko_to_kegg to TRUE.
 #The kegg pathway typically have the more explainable description.
@@ -110,9 +112,10 @@ daa_results_list <-
     ko_to_kegg = TRUE,
     order = "pathway_class"
     select = NULL,
-    reference = NULL
+    reference = NULL # If your metadata[,group] has more than two levels, please specify a reference.
   )
-#The visualization will be published in viewer.
+
+
 
 #If you want to analysis the EC. MetaCyc. KO without conversions. You should turn ko_to_kegg to FALSE.
 metadata <-
@@ -138,7 +141,130 @@ daa_results_list <-
     select = NULL,
     reference = NULL
   )
-#The visualization will be published in viewer.
+```
+
+### If an error occurs with ggpicrust2, please use the following workflow. 
+
+```{r alternative, eval = FALSE}
+#If you want to analysis kegg pathway abundance instead of ko within the pathway. You should turn ko_to_kegg to TRUE.
+#The kegg pathway typically have the more explainable description.
+#metadata should be tibble.
+metadata <-
+  read_delim(
+    "~/Microbiome/C9orf72/Code And Data/new_metadata.txt",
+    delim = "\t",
+    escape_double = FALSE,
+    trim_ws = TRUE
+  )
+
+kegg_abundance <-
+  ko2kegg_abundance(
+    "/Users/apple/Downloads/pred_metagenome_unstrat.tsv/pred_metagenome_unstrat.tsv"
+  )
+
+group <- "Enviroment"
+
+daa_results_df <-
+  pathway_daa(
+    abundance = kegg_abundance,
+    metadata = metadata,
+    group = group,
+    daa_method = "ALDEx2",
+    select = NULL,
+    reference = NULL
+  )
+
+#if you are using LinDA, limme voom and Maaslin2, please specify a reference just like followings code.
+# daa_results_df <- pathway_daa(abundance = kegg_abundance, metadata = metadata, group = group, daa_method = "ALDEx2", select = NULL, reference = "Harvard BRI")
+
+daa_sub_method_results_df <-
+  daa_results_df[daa_results_df$method == "ALDEx2_Kruskal-Wallace test", ]
+
+daa_annotated_sub_method_results_df <-
+  pathway_annotation(pathway = "KO",
+                     daa_results_df = daa_sub_method_results_df,
+                     ko_to_kegg = TRUE)
+
+Group <-
+  metadata$Enviroment # column which you are interested in metadata
+
+# select parameter format in pathway_error() is c("ko00562", "ko00440", "ko04111", "ko05412", "ko00310", "ko04146", "ko00600", "ko04142", "ko00604", "ko04260", "ko04110", "ko04976", "ko05222", "ko05416", "ko00380", "ko05322", "ko00625", "ko00624", "ko00626", "ko00621")
+
+daa_results_list <-
+  pathway_errorbar(
+    abundance = kegg_abundance,
+    daa_results_df = daa_annotated_sub_method_results_df,
+    Group = Group,
+    p_values_threshold = 0.05,
+    order = "pathway_class",
+    select = NULL,
+    ko_to_kegg = TRUE,
+    p_value_bar = TRUE,
+    colors = NULL,
+    x_lab = "pathway_name"
+  )
+# x_lab can be set "feature" which represents kegg pathway id.
+
+# If you want to analysis the EC. MetaCyc. KO without conversions. You should turn ko_to_kegg to FALSE.
+metadata <-
+  read_delim(
+    "~/Microbiome/C9orf72/Code And Data/new_metadata.txt",
+    delim = "\t",
+    escape_double = FALSE,
+    trim_ws = TRUE
+  )
+
+#ko_abundance should be data.frame instead of tibble
+#ID should be in the first column
+ko_abundance <-
+  read.delim(
+    "/Users/apple/Downloads/pred_metagenome_unstrat.tsv/pred_metagenome_unstrat.tsv"
+  )
+#sometimes there are function, pathway or something else. Change function. when needed
+rownames(ko_abundance) <- ko_abundance$function.
+ko_abundance <- ko_abundance[, -1]
+
+group <- "Enviroment"
+
+daa_results_df <-
+  pathway_daa(
+    abundance = ko_abundance,
+    metadata = metadata,
+    group = group,
+    daa_method = "ALDEx2",
+    select = NULL,
+    reference = NULL
+  )
+
+#if you are using LinDA, limme voom and Maaslin2, please specify a reference just like followings code.
+# daa_results_df <- pathway_daa(abundance = kegg_abundance, metadata = metadata, group = group, daa_method = "ALDEx2", select = NULL, reference = "Harvard BRI")
+
+daa_sub_method_results_df <-
+  daa_results_df[daa_results_df$method == "ALDEx2_Kruskal-Wallace test", ]
+
+daa_annotated_sub_method_results_df <-
+  pathway_annotation(pathway = "KO",
+                     daa_results_df = daa_sub_method_results_df,
+                     ko_to_kegg = FALSE)
+
+Group <-
+  metadata$Enviroment # column which you are interested in metadata
+
+# select parameter format in pathway_error() is c("K00001", "K00002", "K00003", "K00004")
+
+daa_results_list <-
+  pathway_errorbar(
+    abundance = ko_abundance,
+    daa_results_df = daa_annotated_sub_method_results_df,
+    Group = Group,
+    p_values_threshold = 0.05,
+    order = "pathway_class",
+    select = NULL,
+    ko_to_kegg = FALSE,
+    p_value_bar = TRUE,
+    colors = NULL,
+    x_lab = "description"
+  )
 ```
 
 ## Output {#output}