From e23df0dac00a4be4b705eee901a018d9f68e30ca Mon Sep 17 00:00:00 2001 From: dtopouza Date: Tue, 12 Nov 2024 16:52:03 -0500 Subject: [PATCH 01/11] Add initial function and tests for formatting of olink_normalization_product output --- OlinkAnalyze/R/olink_normalization_product.R | 69 ++++++++++++++ .../man/olink_normalization_product_format.Rd | 51 ++++++++++ .../test-olink_normalization_product.R | 94 +++++++++++++++++++ 3 files changed, 214 insertions(+) create mode 100644 OlinkAnalyze/man/olink_normalization_product_format.Rd diff --git a/OlinkAnalyze/R/olink_normalization_product.R b/OlinkAnalyze/R/olink_normalization_product.R index ac732eef..40f6da61 100644 --- a/OlinkAnalyze/R/olink_normalization_product.R +++ b/OlinkAnalyze/R/olink_normalization_product.R @@ -640,3 +640,72 @@ olink_normalization_qs <- function(lst_df, return(df_qq_norm) } + + + +#' Formatting the output of olink_normalization_product for seamless use with +#' downstream OA functions. +#' +#' @author +#' Danai G. Topouza +#' +#' @description +#' Removes non-bridgeable assays. Replaces the NPX values of the non-reference +#' project by the Median Centered or QS Normalized NPX, according to the +#' Bridging Recommendation. Replaces OlinkID by the concatenation of the +#' Explore HT and Explore 3072 OlinkIDs. Removes columns BridgingRecommendation, +#' MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. +#' +#' @param df A "tibble" of Olink data in long format resulting from the +#' olink_normalization_product function. +#' +#' @return A "tibble" of Olink data in long format containing both input +#' datasets with the quantile normalized quantifications, with the above +#' modifications. +#' +#' @examples +#' \donttest{ +#' # Bridge samples +#' bridge_samples <- intersect( +#' x = unique(OlinkAnalyze:::data_ht_small$SampleID), +#' y = unique(OlinkAnalyze:::data_3k_small$SampleID) +#' ) |> +#' (\(x) x[!grepl("CONTROL", x)])() +#' +#' # Run olink_normalization_product +#' npx_br_data <- olink_normalization( +#' df1 = OlinkAnalyze:::data_ht_small, +#' df2 = OlinkAnalyze:::data_3k_small, +#' overlapping_samples_df1 = bridge_samples, +#' df1_project_nr = "Explore HT", +#' df2_project_nr = "Explore 3072", +#' reference_project = "Explore HT") +#' +#' # Format output +#' npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format(npx_br_data) +#' +#' } + +olink_normalization_product_format <- function(df) { + + ### Keep the data following BridgingRecommendation + df_format <- df |> + dplyr::filter(.data[["BridgingRecommendation"]] != "NotBridgeable") |> + dplyr::mutate(NPX = case_when( + .data[["BridgingRecommendation"]] == "MedianCentering" ~ + .data[["MedianCenteredNPX"]], + .data[["BridgingRecommendation"]] == "QuantileSmoothing" ~ + .data[["QSNormalizedNPX"]], + .default = .data[["NPX"]])) |> + dplyr::filter(.data[["AssayType"]] == "assay") |> + dplyr::mutate(OlinkID = paste0(.data[["OlinkID"]], + "_", + .data[["OlinkID_E3072"]])) |> + dplyr::select(!c(.data[["BridgingRecommendation"]], + .data[["MedianCenteredNPX"]], + .data[["QSNormalizedNPX"]], + .data[["OlinkID_E3072"]]))# Remove extra columns + + return(df_format) +} + diff --git a/OlinkAnalyze/man/olink_normalization_product_format.Rd b/OlinkAnalyze/man/olink_normalization_product_format.Rd new file mode 100644 index 00000000..bd15a9ca --- /dev/null +++ b/OlinkAnalyze/man/olink_normalization_product_format.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/olink_normalization_product.R +\name{olink_normalization_product_format} +\alias{olink_normalization_product_format} +\title{Formatting the output of olink_normalization_product for seamless use with +downstream OA functions.} +\usage{ +olink_normalization_product_format(df) +} +\arguments{ +\item{df}{A "tibble" of Olink data in long format resulting from the +olink_normalization_product function.} +} +\value{ +A "tibble" of Olink data in long format containing both input +datasets with the quantile normalized quantifications, with the above +modifications. +} +\description{ +Removes non-bridgeable assays. Replaces the NPX values of the non-reference +project by the Median Centered or QS Normalized NPX, according to the +Bridging Recommendation. Replaces OlinkID by the concatenation of the +Explore HT and Explore 3072 OlinkIDs. Removes columns BridgingRecommendation, +MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. +} +\examples{ +\donttest{ +# Bridge samples +bridge_samples <- intersect( + x = unique(OlinkAnalyze:::data_ht_small$SampleID), + y = unique(OlinkAnalyze:::data_3k_small$SampleID) +) |> + (\(x) x[!grepl("CONTROL", x)])() + +# Run olink_normalization_product +npx_br_data <- olink_normalization( +df1 = OlinkAnalyze:::data_ht_small, +df2 = OlinkAnalyze:::data_3k_small, +overlapping_samples_df1 = bridge_samples, +df1_project_nr = "Explore HT", +df2_project_nr = "Explore 3072", +reference_project = "Explore HT") + +# Format output +npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format(npx_br_data) + +} +} +\author{ +Danai G. Topouza +} diff --git a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R index bb0bc515..eb7c6e76 100644 --- a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R +++ b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R @@ -444,3 +444,97 @@ test_that( ) } ) + + +# Test olink_normalization_product_format ---- + +test_that( + "olink_normalization_product_format - works", + { + data_3k <- get_example_data(filename = "example_3k_data.rds") + data_ht <- get_example_data(filename = "example_HT_data.rds") + + expect_message(expect_warning( + object = norm_br_data <- olink_normalization( + df1 = data_3k, + df2 = data_ht, + overlapping_samples_df1 = intersect( + x = unique(data_3k$SampleID), + y = unique(data_ht$SampleID) + ) |> + (\(x) x[!grepl("CONTROL", x)])(), + overlapping_samples_df2 = NULL, + df1_project_nr = "P1", + df2_project_nr = "P2", + reference_project = "P2", + reference_medians = NULL + ), + regexp = "2 assays are not shared across products."), + regexp = "Cross-product normalization will be performed!" + ) + + + # Format output + norm_br_data_format <- olink_normalization_product_format(norm_br_data) + + ## check that correct columns are removed + expect_equal( + object = length(intersect(colnames(norm_br_data_format), + c("BridgingRecommendation", "MedianCenteredNPX", + "QSNormalizedNPX", "OlinkID_E3072"))), + expected = 0L + ) + + ## check that NotBridgeable assays are removed + not_bridgeable_assays <- norm_br_data |> + dplyr::filter(.data[["BridgingRecommendation"]] == "NotBridgeable") |> + dplyr::mutate(OlinkID = paste0(.data[["OlinkID"]], + "_", + .data[["OlinkID_E3072"]])) + + expect_equal( + object = norm_br_data_format |> + dplyr::filter(.data[["OlinkID"]] %in% not_bridgeable_assays$OlinkID) |> + nrow(), + expected = 0L + ) + + + ## check that NPX is being replaced correctly + npx_bridging_recs <- norm_br_data |> + dplyr::mutate(OlinkID = paste0( + .data[["OlinkID"]], + "_", + .data[["OlinkID_E3072"]])) |> + dplyr::select(c("SampleID", + "OlinkID", + "Block", + "BridgingRecommendation", + "MedianCenteredNPX", + "QSNormalizedNPX")) + + npx_assignment_check <- norm_br_data_format |> + dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove sample controls + dplyr::left_join(npx_bridging_recs, + by = c("SampleID", + "OlinkID", + "Block")) |> + dplyr::filter(.data[["Project"]] == "P1") |> + dplyr::mutate(replace_flag = case_when( + .data[["BridgingRecommendation"]] == "MedianCentering" & + .data[["NPX"]] == .data[["MedianCenteredNPX"]] ~ "Correct", + .data[["BridgingRecommendation"]] == "QuantileSmoothing" & + .data[["NPX"]] == .data[["QSNormalizedNPX"]] ~ "Correct", + TRUE ~ "Incorrect" + )) + + expect_equal( + object = npx_assignment_check |> + dplyr::filter(.data[["replace_flag"]] == "Incorrect") |> + nrow(), + expected = 0L + ) + + } +) + From a4ec970d8ffae743446a590b409bb31dc37fd52d Mon Sep 17 00:00:00 2001 From: dtopouza Date: Wed, 13 Nov 2024 16:19:43 -0500 Subject: [PATCH 02/11] Remove control samples and concatenate SampleID and Project when olink_normalization_product_format is run. Add argument to olink_normalization to toggle whether formatting is applied. --- OlinkAnalyze/R/olink_normalization.R | 13 +++++++++++-- OlinkAnalyze/R/olink_normalization_product.R | 13 ++++++++++--- OlinkAnalyze/man/olink_normalization.Rd | 10 ++++++++-- .../man/olink_normalization_product_format.Rd | 9 ++++++--- .../testthat/test-olink_normalization_product.R | 4 +++- 5 files changed, 38 insertions(+), 11 deletions(-) diff --git a/OlinkAnalyze/R/olink_normalization.R b/OlinkAnalyze/R/olink_normalization.R index 35ca2dcd..d5bde89a 100644 --- a/OlinkAnalyze/R/olink_normalization.R +++ b/OlinkAnalyze/R/olink_normalization.R @@ -82,6 +82,9 @@ #' subset normalization. #' @param reference_medians Dataset with columns "OlinkID" and "Reference_NPX". #' Required for reference median normalization. +#' @param format Boolean that controls whether the normalized dataset will be +#' formatted for input to downstream analysis. Only applicable for cross-product +#' bridge normalization. #' #' @return Tibble or ArrowObject with the normalized dataset. #' @@ -222,7 +225,8 @@ #' overlapping_samples_df1 = overlap_samples_product, #' df1_project_nr = "proj_ht", #' df2_project_nr = "proj_3k", -#' reference_project = "proj_ht" +#' reference_project = "proj_ht", +#' format = FALSE #' ) #' } #' @@ -233,7 +237,8 @@ olink_normalization <- function(df1, df1_project_nr = "P1", df2_project_nr = "P2", reference_project = "P1", - reference_medians = NULL) { + reference_medians = NULL, + format = FALSE) { # check input ---- lst_check <- olink_norm_input_check( @@ -306,6 +311,10 @@ olink_normalization <- function(df1, not_ref_cols = lst_check$not_ref_cols ) + if (format == TRUE) { + df_norm <- olink_normalization_product_format(df_norm) + } + } else if (lst_check$norm_mode == olink_norm_modes$subset) { # subset normalization ---- diff --git a/OlinkAnalyze/R/olink_normalization_product.R b/OlinkAnalyze/R/olink_normalization_product.R index 40f6da61..075b92d7 100644 --- a/OlinkAnalyze/R/olink_normalization_product.R +++ b/OlinkAnalyze/R/olink_normalization_product.R @@ -653,8 +653,10 @@ olink_normalization_qs <- function(lst_df, #' Removes non-bridgeable assays. Replaces the NPX values of the non-reference #' project by the Median Centered or QS Normalized NPX, according to the #' Bridging Recommendation. Replaces OlinkID by the concatenation of the -#' Explore HT and Explore 3072 OlinkIDs. Removes columns BridgingRecommendation, -#' MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. +#' Explore HT and Explore 3072 OlinkIDs to record the OlinkIDs from both +#' projects. Replaces SampleID with the concatenation of SampleID and Project +#' to make unique sample IDs for downstream analysis. Removes columns: +#' BridgingRecommendation, MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. #' #' @param df A "tibble" of Olink data in long format resulting from the #' olink_normalization_product function. @@ -682,7 +684,8 @@ olink_normalization_qs <- function(lst_df, #' reference_project = "Explore HT") #' #' # Format output -#' npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format(npx_br_data) +#' npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format( +#' npx_br_data) #' #' } @@ -690,6 +693,10 @@ olink_normalization_product_format <- function(df) { ### Keep the data following BridgingRecommendation df_format <- df |> + dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove controls + dplyr::mutate(SampleID = paste0(.data[["SampleID"]], + "_", + .data[["Project"]])) |> dplyr::filter(.data[["BridgingRecommendation"]] != "NotBridgeable") |> dplyr::mutate(NPX = case_when( .data[["BridgingRecommendation"]] == "MedianCentering" ~ diff --git a/OlinkAnalyze/man/olink_normalization.Rd b/OlinkAnalyze/man/olink_normalization.Rd index 50172402..aaabd558 100644 --- a/OlinkAnalyze/man/olink_normalization.Rd +++ b/OlinkAnalyze/man/olink_normalization.Rd @@ -12,7 +12,8 @@ olink_normalization( df1_project_nr = "P1", df2_project_nr = "P2", reference_project = "P1", - reference_medians = NULL + reference_medians = NULL, + format = FALSE ) } \arguments{ @@ -39,6 +40,10 @@ subset normalization.} \item{reference_medians}{Dataset with columns "OlinkID" and "Reference_NPX". Required for reference median normalization.} + +\item{format}{Boolean that controls whether the normalized dataset will be +formatted for input to downstream analysis. Only applicable for cross-product +bridge normalization.} } \value{ Tibble or ArrowObject with the normalized dataset. @@ -250,7 +255,8 @@ olink_normalization( overlapping_samples_df1 = overlap_samples_product, df1_project_nr = "proj_ht", df2_project_nr = "proj_3k", - reference_project = "proj_ht" + reference_project = "proj_ht", + format = FALSE ) } diff --git a/OlinkAnalyze/man/olink_normalization_product_format.Rd b/OlinkAnalyze/man/olink_normalization_product_format.Rd index bd15a9ca..caaa4223 100644 --- a/OlinkAnalyze/man/olink_normalization_product_format.Rd +++ b/OlinkAnalyze/man/olink_normalization_product_format.Rd @@ -20,8 +20,10 @@ modifications. Removes non-bridgeable assays. Replaces the NPX values of the non-reference project by the Median Centered or QS Normalized NPX, according to the Bridging Recommendation. Replaces OlinkID by the concatenation of the -Explore HT and Explore 3072 OlinkIDs. Removes columns BridgingRecommendation, -MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. +Explore HT and Explore 3072 OlinkIDs to record the OlinkIDs from both +projects. Replaces SampleID with the concatenation of SampleID and Project +to make unique sample IDs for downstream analysis. Removes columns: +BridgingRecommendation, MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. } \examples{ \donttest{ @@ -42,7 +44,8 @@ df2_project_nr = "Explore 3072", reference_project = "Explore HT") # Format output -npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format(npx_br_data) +npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format( +npx_br_data) } } diff --git a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R index eb7c6e76..7709ee36 100644 --- a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R +++ b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R @@ -502,6 +502,9 @@ test_that( ## check that NPX is being replaced correctly npx_bridging_recs <- norm_br_data |> + dplyr:::mutate(SampleID = paste0(.data[["SampleID"]], + "_", + .data[["Project"]])) |> dplyr::mutate(OlinkID = paste0( .data[["OlinkID"]], "_", @@ -514,7 +517,6 @@ test_that( "QSNormalizedNPX")) npx_assignment_check <- norm_br_data_format |> - dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove sample controls dplyr::left_join(npx_bridging_recs, by = c("SampleID", "OlinkID", From 418fa831289f8ffbda1e36248e208309a658b6d1 Mon Sep 17 00:00:00 2001 From: dtopouza Date: Wed, 13 Nov 2024 16:27:28 -0500 Subject: [PATCH 03/11] Edit function description --- OlinkAnalyze/R/olink_normalization_product.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/OlinkAnalyze/R/olink_normalization_product.R b/OlinkAnalyze/R/olink_normalization_product.R index 075b92d7..5453b7df 100644 --- a/OlinkAnalyze/R/olink_normalization_product.R +++ b/OlinkAnalyze/R/olink_normalization_product.R @@ -655,8 +655,9 @@ olink_normalization_qs <- function(lst_df, #' Bridging Recommendation. Replaces OlinkID by the concatenation of the #' Explore HT and Explore 3072 OlinkIDs to record the OlinkIDs from both #' projects. Replaces SampleID with the concatenation of SampleID and Project -#' to make unique sample IDs for downstream analysis. Removes columns: -#' BridgingRecommendation, MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. +#' to make unique sample IDs for downstream analysis. Removes internal and +#' external controls. Removes BridgingRecommendation, MedianCenteredNPX, +#'QSNormalizedNPX, OlinkID_E3072 columns. #' #' @param df A "tibble" of Olink data in long format resulting from the #' olink_normalization_product function. From b3df23ba4f85bf2585b63af9f07393d32e9c3e1f Mon Sep 17 00:00:00 2001 From: dtopouza Date: Wed, 13 Nov 2024 21:37:45 +0000 Subject: [PATCH 04/11] Document --- OlinkAnalyze/man/olink_normalization_product_format.Rd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/OlinkAnalyze/man/olink_normalization_product_format.Rd b/OlinkAnalyze/man/olink_normalization_product_format.Rd index caaa4223..6f1d2645 100644 --- a/OlinkAnalyze/man/olink_normalization_product_format.Rd +++ b/OlinkAnalyze/man/olink_normalization_product_format.Rd @@ -22,8 +22,9 @@ project by the Median Centered or QS Normalized NPX, according to the Bridging Recommendation. Replaces OlinkID by the concatenation of the Explore HT and Explore 3072 OlinkIDs to record the OlinkIDs from both projects. Replaces SampleID with the concatenation of SampleID and Project -to make unique sample IDs for downstream analysis. Removes columns: -BridgingRecommendation, MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. +to make unique sample IDs for downstream analysis. Removes internal and +external controls. Removes BridgingRecommendation, MedianCenteredNPX, +QSNormalizedNPX, OlinkID_E3072 columns. } \examples{ \donttest{ From fd5b993f9c89923494a1d1c492654f599283528e Mon Sep 17 00:00:00 2001 From: dtopouza Date: Wed, 13 Nov 2024 16:47:36 -0500 Subject: [PATCH 05/11] Fix function description --- OlinkAnalyze/man/olink_normalization_product_format.Rd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/OlinkAnalyze/man/olink_normalization_product_format.Rd b/OlinkAnalyze/man/olink_normalization_product_format.Rd index caaa4223..6f1d2645 100644 --- a/OlinkAnalyze/man/olink_normalization_product_format.Rd +++ b/OlinkAnalyze/man/olink_normalization_product_format.Rd @@ -22,8 +22,9 @@ project by the Median Centered or QS Normalized NPX, according to the Bridging Recommendation. Replaces OlinkID by the concatenation of the Explore HT and Explore 3072 OlinkIDs to record the OlinkIDs from both projects. Replaces SampleID with the concatenation of SampleID and Project -to make unique sample IDs for downstream analysis. Removes columns: -BridgingRecommendation, MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072. +to make unique sample IDs for downstream analysis. Removes internal and +external controls. Removes BridgingRecommendation, MedianCenteredNPX, +QSNormalizedNPX, OlinkID_E3072 columns. } \examples{ \donttest{ From f0335c6fe47498ee83e1b3ac9a1955ab7979a633 Mon Sep 17 00:00:00 2001 From: dtopouza Date: Wed, 18 Dec 2024 09:58:06 -0500 Subject: [PATCH 06/11] Update olink_normalization_product_format to include NotBridgeable and NotOverlapping assays with their original OlinkIDs and NPX values. --- OlinkAnalyze/R/olink_normalization.R | 6 +- OlinkAnalyze/R/olink_normalization_product.R | 99 ++++++++++--- .../man/olink_normalization_product_format.Rd | 49 +++++-- .../test-olink_normalization_product.R | 136 +++++++++++++----- 4 files changed, 226 insertions(+), 64 deletions(-) diff --git a/OlinkAnalyze/R/olink_normalization.R b/OlinkAnalyze/R/olink_normalization.R index d5bde89a..9dfd3db5 100644 --- a/OlinkAnalyze/R/olink_normalization.R +++ b/OlinkAnalyze/R/olink_normalization.R @@ -312,7 +312,11 @@ olink_normalization <- function(df1, ) if (format == TRUE) { - df_norm <- olink_normalization_product_format(df_norm) + df_norm <- olink_normalization_product_format(bridged_df = df_norm, + df1 = df1, + df1_project_nr = df1_project_nr, + df2 = df2, + df2_project_nr = df2_project_nr) } } else if (lst_check$norm_mode == olink_norm_modes$subset) { diff --git a/OlinkAnalyze/R/olink_normalization_product.R b/OlinkAnalyze/R/olink_normalization_product.R index 5453b7df..dc77d52c 100644 --- a/OlinkAnalyze/R/olink_normalization_product.R +++ b/OlinkAnalyze/R/olink_normalization_product.R @@ -650,20 +650,31 @@ olink_normalization_qs <- function(lst_df, #' Danai G. Topouza #' #' @description -#' Removes non-bridgeable assays. Replaces the NPX values of the non-reference -#' project by the Median Centered or QS Normalized NPX, according to the -#' Bridging Recommendation. Replaces OlinkID by the concatenation of the -#' Explore HT and Explore 3072 OlinkIDs to record the OlinkIDs from both -#' projects. Replaces SampleID with the concatenation of SampleID and Project -#' to make unique sample IDs for downstream analysis. Removes internal and -#' external controls. Removes BridgingRecommendation, MedianCenteredNPX, -#'QSNormalizedNPX, OlinkID_E3072 columns. -#' -#' @param df A "tibble" of Olink data in long format resulting from the +#' Replaces the NPX values of the non-reference project by the Median Centered +#' or QS Normalized NPX, according to the Bridging Recommendation. Edits the +#' BridgingRecommendation column to indicate whether an assay is NotBridgeable, +#' NotOverlapping, MedianCentering, or QuantileSmoothing bridged. Replaces +#' OlinkID by the concatenation of the Explore HT and Explore 3072 OlinkIDs to +#' record the OlinkIDs from both projects for bridgeable assays. Assays that are +#' NotBridgeable or NotOverlapping retain their original non-reference OlinkIDs +#' and NPX values. Replaces SampleID with the concatenation of SampleID and +#' Project to make unique sample IDs for downstream analysis. Removes internal +#' and external controls. Removes MedianCenteredNPX, QSNormalizedNPX, +#' OlinkID_E3072 columns. +#' +#' @param bridged_df A "tibble" of Olink data in long format resulting from the #' olink_normalization_product function. +#' @param df1 First dataset to be used for normalization, pre-normalization. +#' Must match df1 used in olink_normalization product bridging. +#' @param df2 Second dataset to be used for normalization, pre-normalization. +#' Must match df2 used in olink_normalization product bridging. +#' @param df1_project_nr Project name of first dataset. Must match name used in +#' olink_normalization product bridging. +#' @param df2_project_nr Project name of second dataset. Must match name used in +#' olink_normalization product bridging. #' #' @return A "tibble" of Olink data in long format containing both input -#' datasets with the quantile normalized quantifications, with the above +#' datasets with the bridged NPX quantifications, with the above #' modifications. #' #' @examples @@ -686,20 +697,66 @@ olink_normalization_qs <- function(lst_df, #' #' # Format output #' npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format( -#' npx_br_data) +#' bridged_df = npx_br_data, +#' df1 = OlinkAnalyze:::data_ht_small, +#' df2 = OlinkAnalyze:::data_3k_small, +#' df1_project_nr = "Explore HT", +#' df2_project_nr = "Explore 3072") #' #' } -olink_normalization_product_format <- function(df) { +olink_normalization_product_format <- function(bridged_df, + df1, + df1_project_nr, + df2, + df2_project_nr) { + + # Extract data from NotBridgeable assays + df_not_bridgeable <- bridged_df |> + dplyr::mutate(SampleID = paste0(.data[["SampleID"]], + "_", + .data[["Project"]])) |> + dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove controls + dplyr::filter(.data[["AssayType"]] == "assay") |> + dplyr::filter(.data[["BridgingRecommendation"]] == "NotBridgeable") |> + dplyr::mutate(OlinkID = case_when( + Panel == "Explore_HT" ~ OlinkID, + Panel != "Explore_HT" ~ OlinkID_E3072 + )) |> + dplyr::select(!c(.data[["MedianCenteredNPX"]], + .data[["QSNormalizedNPX"]], + .data[["OlinkID_E3072"]]))# Remove extra columns + + + # Extract data from non-overlapping assays + df1_no_overlap <- df1 |> + dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove controls + dplyr::filter(!(.data[["OlinkID"]] %in% + unlist(eHT_e3072_mapping |> + dplyr::select(starts_with("OlinkID_"))))) |> + dplyr::mutate(Project = df1_project_nr) |> + dplyr::mutate(SampleID = + paste0(.data[["SampleID"]],"_",df1_project_nr)) |> + dplyr::mutate(BridgingRecommendation = "NotOverlapping") + + df2_no_overlap <- df2 |> + dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove controls + dplyr::filter(!(.data[["OlinkID"]] %in% + unlist(eHT_e3072_mapping |> + dplyr::select(starts_with("OlinkID_"))))) |> + dplyr::mutate(Project = df2_project_nr) |> + dplyr::mutate(SampleID = + paste0(.data[["SampleID"]],"_",df2_project_nr)) |> + dplyr::mutate(BridgingRecommendation = "NotOverlapping") ### Keep the data following BridgingRecommendation - df_format <- df |> + df_format <- bridged_df |> dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove controls dplyr::mutate(SampleID = paste0(.data[["SampleID"]], "_", .data[["Project"]])) |> - dplyr::filter(.data[["BridgingRecommendation"]] != "NotBridgeable") |> - dplyr::mutate(NPX = case_when( + dplyr::filter(!.data[["BridgingRecommendation"]] == "NotBridgeable") |> + dplyr::mutate(NPX = case_when( .data[["BridgingRecommendation"]] == "MedianCentering" ~ .data[["MedianCenteredNPX"]], .data[["BridgingRecommendation"]] == "QuantileSmoothing" ~ @@ -709,11 +766,15 @@ olink_normalization_product_format <- function(df) { dplyr::mutate(OlinkID = paste0(.data[["OlinkID"]], "_", .data[["OlinkID_E3072"]])) |> - dplyr::select(!c(.data[["BridgingRecommendation"]], - .data[["MedianCenteredNPX"]], + dplyr::select(!c(.data[["MedianCenteredNPX"]], .data[["QSNormalizedNPX"]], .data[["OlinkID_E3072"]]))# Remove extra columns - return(df_format) + df_full <- rbind(df_format, + df_not_bridgeable, + df1_no_overlap, + df2_no_overlap) + + return(df_full) } diff --git a/OlinkAnalyze/man/olink_normalization_product_format.Rd b/OlinkAnalyze/man/olink_normalization_product_format.Rd index 6f1d2645..b9080e42 100644 --- a/OlinkAnalyze/man/olink_normalization_product_format.Rd +++ b/OlinkAnalyze/man/olink_normalization_product_format.Rd @@ -5,26 +5,47 @@ \title{Formatting the output of olink_normalization_product for seamless use with downstream OA functions.} \usage{ -olink_normalization_product_format(df) +olink_normalization_product_format( + bridged_df, + df1, + df1_project_nr, + df2, + df2_project_nr +) } \arguments{ -\item{df}{A "tibble" of Olink data in long format resulting from the +\item{bridged_df}{A "tibble" of Olink data in long format resulting from the olink_normalization_product function.} + +\item{df1}{First dataset to be used for normalization, pre-normalization. +Must match df1 used in olink_normalization product bridging.} + +\item{df1_project_nr}{Project name of first dataset. Must match name used in +olink_normalization product bridging.} + +\item{df2}{Second dataset to be used for normalization, pre-normalization. +Must match df2 used in olink_normalization product bridging.} + +\item{df2_project_nr}{Project name of second dataset. Must match name used in +olink_normalization product bridging.} } \value{ A "tibble" of Olink data in long format containing both input -datasets with the quantile normalized quantifications, with the above +datasets with the bridged NPX quantifications, with the above modifications. } \description{ -Removes non-bridgeable assays. Replaces the NPX values of the non-reference -project by the Median Centered or QS Normalized NPX, according to the -Bridging Recommendation. Replaces OlinkID by the concatenation of the -Explore HT and Explore 3072 OlinkIDs to record the OlinkIDs from both -projects. Replaces SampleID with the concatenation of SampleID and Project -to make unique sample IDs for downstream analysis. Removes internal and -external controls. Removes BridgingRecommendation, MedianCenteredNPX, -QSNormalizedNPX, OlinkID_E3072 columns. +Replaces the NPX values of the non-reference project by the Median Centered +or QS Normalized NPX, according to the Bridging Recommendation. Edits the +BridgingRecommendation column to indicate whether an assay is NotBridgeable, +NotOverlapping, MedianCentering, or QuantileSmoothing bridged. Replaces +OlinkID by the concatenation of the Explore HT and Explore 3072 OlinkIDs to +record the OlinkIDs from both projects for bridgeable assays. Assays that are +NotBridgeable or NotOverlapping retain their original non-reference OlinkIDs +and NPX values. Replaces SampleID with the concatenation of SampleID and +Project to make unique sample IDs for downstream analysis. Removes internal +and external controls. Removes MedianCenteredNPX, QSNormalizedNPX, +OlinkID_E3072 columns. } \examples{ \donttest{ @@ -46,7 +67,11 @@ reference_project = "Explore HT") # Format output npx_br_data_format <- OlinkAnalyze:::olink_normalization_product_format( -npx_br_data) +bridged_df = npx_br_data, +df1 = OlinkAnalyze:::data_ht_small, +df2 = OlinkAnalyze:::data_3k_small, +df1_project_nr = "Explore HT", +df2_project_nr = "Explore 3072") } } diff --git a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R index d83274c2..8ddf55fe 100644 --- a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R +++ b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R @@ -461,52 +461,81 @@ test_that( test_that( "olink_normalization_product_format - works", { - data_3k <- get_example_data(filename = "example_3k_data.rds") - data_ht <- get_example_data(filename = "example_HT_data.rds") + data_3k <- get_example_data(filename = "../data/example_3k_data.rds") + data_ht <- get_example_data(filename = "../data/example_HT_data.rds") expect_message(expect_warning( - object = norm_br_data <- olink_normalization( - df1 = data_3k, - df2 = data_ht, - overlapping_samples_df1 = intersect( - x = unique(data_3k$SampleID), - y = unique(data_ht$SampleID) - ) |> - (\(x) x[!grepl("CONTROL", x)])(), - overlapping_samples_df2 = NULL, - df1_project_nr = "P1", - df2_project_nr = "P2", - reference_project = "P2", - reference_medians = NULL - ), + object = norm_br_data_format <- olink_normalization( + df1 = data_3k, + df2 = data_ht, + overlapping_samples_df1 = intersect( + x = unique(data_3k$SampleID), + y = unique(data_ht$SampleID) + ) |> + (\(x) x[!grepl("CONTROL", x)])(), + overlapping_samples_df2 = NULL, + df1_project_nr = "P1", + df2_project_nr = "P2", + reference_project = "P2", + reference_medians = NULL, + format = TRUE # format data + ), regexp = "2 assays are not shared across products."), regexp = "Cross-product normalization will be performed!" ) - # Format output - norm_br_data_format <- olink_normalization_product_format(norm_br_data) + # unformatted data + expect_message(expect_warning( + object = norm_br_data <- olink_normalization( + df1 = data_3k, + df2 = data_ht, + overlapping_samples_df1 = intersect( + x = unique(data_3k$SampleID), + y = unique(data_ht$SampleID) + ) |> + (\(x) x[!grepl("CONTROL", x)])(), + overlapping_samples_df2 = NULL, + df1_project_nr = "P1", + df2_project_nr = "P2", + reference_project = "P2", + reference_medians = NULL # format data + ), + regexp = "2 assays are not shared across products."), + regexp = "Cross-product normalization will be performed!" + ) + + expect_warning( + object = norm_br_data_format_function <- + olink_normalization_product_format(bridged_df = norm_br_data, + df1 = data_ht, + df1_project_nr = "P2", + df2 = data_3k, + df2_project_nr = "P1"), + regexp = NA) + + ## check that function works both ways + expect_equal( + object = dim(norm_br_data_format), + expected = dim(norm_br_data_format_function) + ) ## check that correct columns are removed expect_equal( object = length(intersect(colnames(norm_br_data_format), - c("BridgingRecommendation", "MedianCenteredNPX", + c("MedianCenteredNPX", "QSNormalizedNPX", "OlinkID_E3072"))), expected = 0L ) - ## check that NotBridgeable assays are removed - not_bridgeable_assays <- norm_br_data |> - dplyr::filter(.data[["BridgingRecommendation"]] == "NotBridgeable") |> - dplyr::mutate(OlinkID = paste0(.data[["OlinkID"]], - "_", - .data[["OlinkID_E3072"]])) - + ## check that NotBridgeable assays get their own OlinkIDs expect_equal( object = norm_br_data_format |> - dplyr::filter(.data[["OlinkID"]] %in% not_bridgeable_assays$OlinkID) |> - nrow(), - expected = 0L + dplyr::filter(.data[["BridgingRecommendation"]] == "NotBridgeable") |> + dplyr::select(all_of("OlinkID")) |> + dplyr::distinct() |> + dplyr::pull(), + expected = c("OID41012", "OID20054") ) @@ -527,15 +556,17 @@ test_that( "QSNormalizedNPX")) npx_assignment_check <- norm_br_data_format |> - dplyr::left_join(npx_bridging_recs, + dplyr::filter(!.data[["BridgingRecommendation"]] %in% c("NotBridgeable","NotOverlapping")) |> + dplyr::left_join(npx_bridging_recs |> + rename(BridgingRecommendationOriginal = all_of("BridgingRecommendation")), by = c("SampleID", "OlinkID", "Block")) |> dplyr::filter(.data[["Project"]] == "P1") |> dplyr::mutate(replace_flag = case_when( - .data[["BridgingRecommendation"]] == "MedianCentering" & + .data[["BridgingRecommendationOriginal"]] == "MedianCentering" & .data[["NPX"]] == .data[["MedianCenteredNPX"]] ~ "Correct", - .data[["BridgingRecommendation"]] == "QuantileSmoothing" & + .data[["BridgingRecommendationOriginal"]] == "QuantileSmoothing" & .data[["NPX"]] == .data[["QSNormalizedNPX"]] ~ "Correct", TRUE ~ "Incorrect" )) @@ -547,6 +578,47 @@ test_that( expected = 0L ) + ## check that the numbers of assay assignments are correct + expect_equal( + object = norm_br_data_format |> + select(OlinkID, BridgingRecommendation) |> + distinct() |> + filter(BridgingRecommendation == "NotOverlapping") |> + tally() |> + pull(), + expected = 2L + ) + + expect_equal( + object = norm_br_data_format |> + select(OlinkID, BridgingRecommendation) |> + distinct() |> + filter(BridgingRecommendation == "NotBridgeable") |> + tally() |> + pull(), + expected = 2L + ) + + expect_equal( + object = norm_br_data_format |> + select(OlinkID, BridgingRecommendation) |> + distinct() |> + filter(BridgingRecommendation == "MedianCentering") |> + tally() |> + pull(), + expected = 40L + ) + + expect_equal( + object = norm_br_data_format |> + select(OlinkID, BridgingRecommendation) |> + distinct() |> + filter(BridgingRecommendation == "QuantileSmoothing") |> + tally() |> + pull(), + expected = 63L + ) + } ) From 05e954b0c7b9e5fc85b2db67b444e101ee52e203 Mon Sep 17 00:00:00 2001 From: dtopouza Date: Thu, 23 Jan 2025 10:00:20 -0500 Subject: [PATCH 07/11] Update get_example_data function and calls with new file location --- .../tests/testthat/helper-get_example_olink_data.R | 2 +- .../testthat/test-olink_normalization_product.R | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R b/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R index d227d09f..777b4723 100644 --- a/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R +++ b/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R @@ -1,6 +1,6 @@ # load example datasets for Olink Explore products get_example_data <- function(filename) { - ref_norm_res_file <- test_path("data", filename) + ref_norm_res_file <- test_path("testthat","data", filename) #check that file exists expect_true(file.exists(ref_norm_res_file)) # read rds data diff --git a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R index 0d9de3a4..e3886b00 100644 --- a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R +++ b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R @@ -3,10 +3,10 @@ test_that( "olink_normalization_is_bridgeable - works", { - + skip_if_not(file.exists(test_path("data","example_3k_data.rds"))) skip_if_not(file.exists(test_path("data","example_HT_data.rds"))) - + data_3k <- get_example_data(filename = "example_3k_data.rds") data_ht <- get_example_data(filename = "example_HT_data.rds") @@ -118,7 +118,7 @@ test_that( test_that( "olink_normalization_qs - works - compare to reference", { - + skip_if_not(file.exists(test_path("data","example_3k_data.rds"))) skip_if_not(file.exists(test_path("data","example_HT_data.rds"))) @@ -214,7 +214,7 @@ test_that( test_that( "olink_normalization_qs - works - expected output, all bridge samples", { - + skip_if_not(file.exists(test_path("data","example_3k_data.rds"))) skip_if_not(file.exists(test_path("data","example_HT_data.rds"))) @@ -338,7 +338,7 @@ test_that( test_that( "olink_normalization_qs - works - expected output, 50 bridge samples", { - + skip_if_not(file.exists(test_path("data","example_3k_data.rds"))) skip_if_not(file.exists(test_path("data","example_HT_data.rds"))) @@ -466,8 +466,8 @@ test_that( test_that( "olink_normalization_product_format - works", { - data_3k <- get_example_data(filename = "../data/example_3k_data.rds") - data_ht <- get_example_data(filename = "../data/example_HT_data.rds") + data_3k <- get_example_data(filename = "example_3k_data.rds") + data_ht <- get_example_data(filename = "example_HT_data.rds") expect_message(expect_warning( object = norm_br_data_format <- olink_normalization( From c6ffb9bebfb23b932b355fef50fc5c98b7f0a3ea Mon Sep 17 00:00:00 2001 From: Kathy Nevola <72228735+kathy-nevola@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:02:51 -0500 Subject: [PATCH 08/11] fix test example data interaction --- OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R | 5 +++-- .../tests/testthat/test-olink_normalization_product.R | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R b/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R index 777b4723..bc50a28b 100644 --- a/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R +++ b/OlinkAnalyze/tests/testthat/helper-get_example_olink_data.R @@ -1,8 +1,9 @@ # load example datasets for Olink Explore products get_example_data <- function(filename) { - ref_norm_res_file <- test_path("testthat","data", filename) + ref_norm_res_file <- test_path("data", filename) + print(ref_norm_res_file) #check that file exists - expect_true(file.exists(ref_norm_res_file)) + expect_true(file.exists(ref_norm_res_file),label = ref_norm_res_file) # read rds data readRDS(file = ref_norm_res_file) } diff --git a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R index e3886b00..358dd3c2 100644 --- a/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R +++ b/OlinkAnalyze/tests/testthat/test-olink_normalization_product.R @@ -466,6 +466,9 @@ test_that( test_that( "olink_normalization_product_format - works", { + skip_if_not(file.exists(test_path("data","example_3k_data.rds"))) + skip_if_not(file.exists(test_path("data","example_HT_data.rds"))) + data_3k <- get_example_data(filename = "example_3k_data.rds") data_ht <- get_example_data(filename = "example_HT_data.rds") From 08990582560443fad1395d2a2679910532da0034 Mon Sep 17 00:00:00 2001 From: Kathy Nevola <72228735+kathy-nevola@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:03:35 -0500 Subject: [PATCH 09/11] added missing selective call for dpylr stars_with --- OlinkAnalyze/R/olink_normalization_product.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OlinkAnalyze/R/olink_normalization_product.R b/OlinkAnalyze/R/olink_normalization_product.R index 39d05003..eaff66ae 100644 --- a/OlinkAnalyze/R/olink_normalization_product.R +++ b/OlinkAnalyze/R/olink_normalization_product.R @@ -733,7 +733,7 @@ olink_normalization_product_format <- function(bridged_df, dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove controls dplyr::filter(!(.data[["OlinkID"]] %in% unlist(eHT_e3072_mapping |> - dplyr::select(starts_with("OlinkID_"))))) |> + dplyr::select(dplyr::starts_with("OlinkID_"))))) |> dplyr::mutate(Project = df1_project_nr) |> dplyr::mutate(SampleID = paste0(.data[["SampleID"]],"_",df1_project_nr)) |> From a2f78b09a562a6ebb28eb7bec26a2d086311cc95 Mon Sep 17 00:00:00 2001 From: dtopouza Date: Fri, 24 Jan 2025 10:29:11 -0500 Subject: [PATCH 10/11] Fix dplyr package call and confirm devtools::checks() pass --- OlinkAnalyze/R/olink_normalization_product.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OlinkAnalyze/R/olink_normalization_product.R b/OlinkAnalyze/R/olink_normalization_product.R index eaff66ae..298bb1b2 100644 --- a/OlinkAnalyze/R/olink_normalization_product.R +++ b/OlinkAnalyze/R/olink_normalization_product.R @@ -743,7 +743,7 @@ olink_normalization_product_format <- function(bridged_df, dplyr::filter(.data[["SampleType"]] == "SAMPLE") |> # Remove controls dplyr::filter(!(.data[["OlinkID"]] %in% unlist(eHT_e3072_mapping |> - dplyr::select(starts_with("OlinkID_"))))) |> + dplyr::select(dplyr::starts_with("OlinkID_"))))) |> dplyr::mutate(Project = df2_project_nr) |> dplyr::mutate(SampleID = paste0(.data[["SampleID"]],"_",df2_project_nr)) |> From f162903bc8fb23f0bc3fe13a245b853a78b087a6 Mon Sep 17 00:00:00 2001 From: dtopouza Date: Fri, 24 Jan 2025 13:12:15 -0500 Subject: [PATCH 11/11] Order output by Project and SampleID --- OlinkAnalyze/R/olink_normalization_product.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/OlinkAnalyze/R/olink_normalization_product.R b/OlinkAnalyze/R/olink_normalization_product.R index 298bb1b2..99a89793 100644 --- a/OlinkAnalyze/R/olink_normalization_product.R +++ b/OlinkAnalyze/R/olink_normalization_product.R @@ -775,6 +775,10 @@ olink_normalization_product_format <- function(bridged_df, df1_no_overlap, df2_no_overlap) + # Sort by Project + df_full <- df_full |> + dplyr::arrange(.data[["Project"]], .data[["SampleID"]]) + return(df_full) }