diff --git a/NAMESPACE b/NAMESPACE index 03f1263c..25864189 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,9 @@ S3method(gen_tibble,matrix) S3method(group_by,gen_tbl) S3method(indiv_het_obs,tbl_df) S3method(indiv_het_obs,vctrs_bigSNP) +S3method(indiv_inbreeding,grouped_df) +S3method(indiv_inbreeding,tbl_df) +S3method(indiv_inbreeding,vctrs_bigSNP) S3method(indiv_missingness,tbl_df) S3method(indiv_missingness,vctrs_bigSNP) S3method(indiv_ploidy,tbl_df) @@ -110,6 +113,7 @@ export(gt_save) export(gt_set_imputed) export(gt_uses_imputed) export(indiv_het_obs) +export(indiv_inbreeding) export(indiv_missingness) export(indiv_ploidy) export(loci_alt_freq) diff --git a/R/indiv_inbreeding.R b/R/indiv_inbreeding.R new file mode 100644 index 00000000..86430e85 --- /dev/null +++ b/R/indiv_inbreeding.R @@ -0,0 +1,63 @@ +#' Individual inbreeding coefficient +#' +#' This function calculates the inbreeding coefficient for each individual +#' based on the beat estimate of Weir and Goudet (2017). +#' +#' @param .x a vector of class `vctrs_bigSNP` (usually the `genotype` column of +#' a [`gen_tibble`] object), +#' or a [`gen_tibble`]. +#' @param method currently only "WG17" (for Weir and Goudet 2017). +#' @param allele_sharing_mat optional and only relevant for "WG17", the matrix of Allele Sharing returned by +#' [pairwise_allele_sharing()] with `as_matrix=TRUE`. As a number of statistics can be +#' derived from the Allele Sharing matrix, +#' it it sometimes more efficient to pre-compute this matrix. It is not possible to use this with +#' grouped tibbles. +#' @param ... currently unused. +#' @return a numeric vector of inbreeding coefficients. +#' @export + +indiv_inbreeding <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...) { + UseMethod("indiv_inbreeding", .x) +} + +#' @export +#' @rdname indiv_inbreeding +indiv_inbreeding.tbl_df <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...){ + stopifnot_gen_tibble(.x) + # extract the column and hand it over to its method + if (is.null(allele_sharing_mat)){ + allele_sharing_mat <- pairwise_allele_sharing(.x, as_matrix = TRUE) + } + indiv_inbreeding(.x$genotypes, method= method, allele_sharing_mat = allele_sharing_mat, ...) +} + +#' @export +#' @rdname indiv_inbreeding +indiv_inbreeding.vctrs_bigSNP <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...){ + rlang::check_dots_empty() + stopifnot_diploid(.x) + if (is.null(allele_sharing_mat)){ + # deal with the fact that allele sharing can only be computed for the gen_tibble + # TODO modify allele sharing code to be able to create a matrix just from the genotype column + stop("this method has to be applied to a gen_tibble, not just the genotype column") + } + + # taken from beta.dosage in hierfstat + Mii <- diag(allele_sharing_mat) # diagonal of the matrix + diag(allele_sharing_mat) <- NA + MB <- mean(allele_sharing_mat, na.rm = T) + indiv_inb <- ((Mii * 2 - 1) - MB)/(1 - MB) + return(indiv_inb) +} + +#' @export +#' @rdname loci_alt_freq +indiv_inbreeding.grouped_df <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...){ + rlang::check_dots_empty() + stopifnot_diploid(.x) + if (!is.null(allele_sharing_mat)){ + stop("allele_sharing_mat can not be provided for grouped_df objects") + } + + group_map(.x, .f=~indiv_inbreeding.(.x, method = method, allele_sharing_mat=NULL, ...)) +} diff --git a/R/pairwise_allele_sharing.R b/R/pairwise_allele_sharing.R index 84211853..f9819791 100644 --- a/R/pairwise_allele_sharing.R +++ b/R/pairwise_allele_sharing.R @@ -15,6 +15,7 @@ #' @export pairwise_allele_sharing <- function(x, as_matrix=FALSE, block_size = bigstatsr::block_size(count_loci(x))) { + X <- attr(x$genotypes,"bigsnp") # convenient pointer x_ind_col <- .gt_bigsnp_cols(x) x_ind_row <- .gt_bigsnp_rows(x) diff --git a/man/indiv_inbreeding.Rd b/man/indiv_inbreeding.Rd new file mode 100644 index 00000000..ed3d24c1 --- /dev/null +++ b/man/indiv_inbreeding.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/indiv_inbreeding.R +\name{indiv_inbreeding} +\alias{indiv_inbreeding} +\alias{indiv_inbreeding.tbl_df} +\alias{indiv_inbreeding.vctrs_bigSNP} +\title{Individual inbreeding coefficient} +\usage{ +indiv_inbreeding(.x, method = c("WG17"), allele_sharing_mat = NULL, ...) + +\method{indiv_inbreeding}{tbl_df}(.x, method = c("WG17"), allele_sharing_mat = NULL, ...) + +\method{indiv_inbreeding}{vctrs_bigSNP}(.x, method = c("WG17"), allele_sharing_mat = NULL, ...) +} +\arguments{ +\item{.x}{a vector of class \code{vctrs_bigSNP} (usually the \code{genotype} column of +a \code{\link{gen_tibble}} object), +or a \code{\link{gen_tibble}}.} + +\item{method}{currently only "WG17" (for Weir and Goudet 2017).} + +\item{allele_sharing_mat}{optional and only relevant for "WG17", the matrix of Allele Sharing returned by +\code{\link[=pairwise_allele_sharing]{pairwise_allele_sharing()}} with \code{as_matrix=TRUE}. As a number of statistics can be +derived from the Allele Sharing matrix, +it it sometimes more efficient to pre-compute this matrix. It is not possible to use this with +grouped tibbles.} + +\item{...}{currently unused.} +} +\value{ +a numeric vector of inbreeding coefficients. +} +\description{ +This function calculates the inbreeding coefficient for each individual +based on the beat estimate of Weir and Goudet (2017). +} diff --git a/man/loci_alt_freq.Rd b/man/loci_alt_freq.Rd index a69c5fe3..68d934ae 100644 --- a/man/loci_alt_freq.Rd +++ b/man/loci_alt_freq.Rd @@ -1,6 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/loci_alt_freq.R -\name{loci_alt_freq} +% Please edit documentation in R/indiv_inbreeding.R, R/loci_alt_freq.R +\name{indiv_inbreeding.grouped_df} +\alias{indiv_inbreeding.grouped_df} \alias{loci_alt_freq} \alias{loci_alt_freq.tbl_df} \alias{loci_alt_freq.vctrs_bigSNP} @@ -11,6 +12,8 @@ \alias{loci_maf.grouped_df} \title{Estimate allele frequencies at each each locus} \usage{ +\method{indiv_inbreeding}{grouped_df}(.x, method = c("WG17"), allele_sharing_mat = NULL, ...) + loci_alt_freq(.x, ...) \method{loci_alt_freq}{tbl_df}(.x, ...) diff --git a/tests/testthat/test_indiv_inbreeding.R b/tests/testthat/test_indiv_inbreeding.R new file mode 100644 index 00000000..7e3a3925 --- /dev/null +++ b/tests/testthat/test_indiv_inbreeding.R @@ -0,0 +1,31 @@ +test_genotypes <- rbind(c(1,1,0,1,1,0), + c(2,1,0,NA,0,0), + c(2,NA,0,0,1,1), + c(1,0,0,1,0,0), + c(1,2,0,1,2,1), + c(0,0,0,0,NA,1), + c(0,1,1,0,1,NA)) +test_indiv_meta <- data.frame (id=c("a","b","c","d","e","f","g"), + population = c("pop1","pop1","pop2","pop2","pop1","pop3","pop3")) +test_loci <- data.frame(name=paste0("rs",1:6), + chromosome=paste0("chr",c(1,1,1,1,2,2)), + position=as.integer(c(3,5,65,343,23,456)), + genetic_dist = as.double(rep(0,6)), + allele_ref = c("A","T","C","G","C","T"), + allele_alt = c("T","C", NA,"C","G","A")) + +test_gt <- gen_tibble(x = test_genotypes, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE) + + +test_that("indiv_inbreeding compute correctly",{ + # expect error if the tibble is not grouped + inbreed_hier <- diag(hierfstat::beta.dosage(test_genotypes)) + inbreed_gt <- test_gt %>% indiv_inbreeding(method = "WG17") + expect_true(all.equal(inbreed_hier, inbreed_gt, check.attributes = FALSE)) + + # group the tibble + test_gt <- test_gt %>% dplyr::group_by(population) + # is this right??? + inbreed_group_gt <- test_gt %>% indiv_inbreeding(method = "WG17") + +})