Skip to content

Commit

Permalink
inbreeding based on beta WG17
Browse files Browse the repository at this point in the history
  • Loading branch information
dramanica committed Dec 7, 2024
1 parent 3daccdd commit dd142d8
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 2 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ S3method(gen_tibble,matrix)
S3method(group_by,gen_tbl)
S3method(indiv_het_obs,tbl_df)
S3method(indiv_het_obs,vctrs_bigSNP)
S3method(indiv_inbreeding,grouped_df)
S3method(indiv_inbreeding,tbl_df)
S3method(indiv_inbreeding,vctrs_bigSNP)
S3method(indiv_missingness,tbl_df)
S3method(indiv_missingness,vctrs_bigSNP)
S3method(indiv_ploidy,tbl_df)
Expand Down Expand Up @@ -110,6 +113,7 @@ export(gt_save)
export(gt_set_imputed)
export(gt_uses_imputed)
export(indiv_het_obs)
export(indiv_inbreeding)
export(indiv_missingness)
export(indiv_ploidy)
export(loci_alt_freq)
Expand Down
63 changes: 63 additions & 0 deletions R/indiv_inbreeding.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#' Individual inbreeding coefficient
#'
#' This function calculates the inbreeding coefficient for each individual
#' based on the beat estimate of Weir and Goudet (2017).
#'
#' @param .x a vector of class `vctrs_bigSNP` (usually the `genotype` column of
#' a [`gen_tibble`] object),
#' or a [`gen_tibble`].
#' @param method currently only "WG17" (for Weir and Goudet 2017).
#' @param allele_sharing_mat optional and only relevant for "WG17", the matrix of Allele Sharing returned by
#' [pairwise_allele_sharing()] with `as_matrix=TRUE`. As a number of statistics can be
#' derived from the Allele Sharing matrix,
#' it it sometimes more efficient to pre-compute this matrix. It is not possible to use this with
#' grouped tibbles.
#' @param ... currently unused.
#' @return a numeric vector of inbreeding coefficients.
#' @export

indiv_inbreeding <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...) {
UseMethod("indiv_inbreeding", .x)
}

#' @export
#' @rdname indiv_inbreeding
indiv_inbreeding.tbl_df <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...){
stopifnot_gen_tibble(.x)
# extract the column and hand it over to its method
if (is.null(allele_sharing_mat)){
allele_sharing_mat <- pairwise_allele_sharing(.x, as_matrix = TRUE)
}
indiv_inbreeding(.x$genotypes, method= method, allele_sharing_mat = allele_sharing_mat, ...)
}

#' @export
#' @rdname indiv_inbreeding
indiv_inbreeding.vctrs_bigSNP <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...){
rlang::check_dots_empty()
stopifnot_diploid(.x)
if (is.null(allele_sharing_mat)){
# deal with the fact that allele sharing can only be computed for the gen_tibble
# TODO modify allele sharing code to be able to create a matrix just from the genotype column
stop("this method has to be applied to a gen_tibble, not just the genotype column")
}

# taken from beta.dosage in hierfstat
Mii <- diag(allele_sharing_mat) # diagonal of the matrix
diag(allele_sharing_mat) <- NA
MB <- mean(allele_sharing_mat, na.rm = T)
indiv_inb <- ((Mii * 2 - 1) - MB)/(1 - MB)
return(indiv_inb)
}

#' @export
#' @rdname loci_alt_freq
indiv_inbreeding.grouped_df <- function(.x, method = c("WG17"), allele_sharing_mat = NULL, ...){
rlang::check_dots_empty()
stopifnot_diploid(.x)
if (!is.null(allele_sharing_mat)){
stop("allele_sharing_mat can not be provided for grouped_df objects")
}

group_map(.x, .f=~indiv_inbreeding.(.x, method = method, allele_sharing_mat=NULL, ...))
}
1 change: 1 addition & 0 deletions R/pairwise_allele_sharing.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' @export
pairwise_allele_sharing <- function(x, as_matrix=FALSE,
block_size = bigstatsr::block_size(count_loci(x))) {

X <- attr(x$genotypes,"bigsnp") # convenient pointer
x_ind_col <- .gt_bigsnp_cols(x)
x_ind_row <- .gt_bigsnp_rows(x)
Expand Down
36 changes: 36 additions & 0 deletions man/indiv_inbreeding.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/loci_alt_freq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions tests/testthat/test_indiv_inbreeding.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
test_genotypes <- rbind(c(1,1,0,1,1,0),
c(2,1,0,NA,0,0),
c(2,NA,0,0,1,1),
c(1,0,0,1,0,0),
c(1,2,0,1,2,1),
c(0,0,0,0,NA,1),
c(0,1,1,0,1,NA))
test_indiv_meta <- data.frame (id=c("a","b","c","d","e","f","g"),
population = c("pop1","pop1","pop2","pop2","pop1","pop3","pop3"))
test_loci <- data.frame(name=paste0("rs",1:6),
chromosome=paste0("chr",c(1,1,1,1,2,2)),
position=as.integer(c(3,5,65,343,23,456)),
genetic_dist = as.double(rep(0,6)),
allele_ref = c("A","T","C","G","C","T"),
allele_alt = c("T","C", NA,"C","G","A"))

test_gt <- gen_tibble(x = test_genotypes, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE)


test_that("indiv_inbreeding compute correctly",{
# expect error if the tibble is not grouped
inbreed_hier <- diag(hierfstat::beta.dosage(test_genotypes))
inbreed_gt <- test_gt %>% indiv_inbreeding(method = "WG17")
expect_true(all.equal(inbreed_hier, inbreed_gt, check.attributes = FALSE))

# group the tibble
test_gt <- test_gt %>% dplyr::group_by(population)
# is this right???
inbreed_group_gt <- test_gt %>% indiv_inbreeding(method = "WG17")

})

0 comments on commit dd142d8

Please sign in to comment.