Skip to content

Commit

Permalink
update internal fill_zero and generate_bruvo_mat
Browse files Browse the repository at this point in the history
These will now allow for numeric or character matrix output. This also
changes the argument `mat` to `mat_type` and changes the value from a
boolean to a character vector indicating the type of matrix output (if
none, no matrix output).

This commit will fix #231
  • Loading branch information
zkamvar committed Jan 30, 2021
1 parent 0648cd6 commit 9259a28
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 45 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ GITHUB

* The default branch for the repository is now "main" (@zkamvar, #218)

BUG FIX
-------

* `genind2genalex()` no longer converts diploid sequence data to zeros on export
This fixes #231 (@zkamvar, #233).

poppr 2.8.7
===========

Expand Down
6 changes: 3 additions & 3 deletions R/file_handling.r
Original file line number Diff line number Diff line change
Expand Up @@ -634,17 +634,17 @@ genind2genalex <- function(gid, filename = "", overwrite = FALSE, quiet = FALSE,
the_gid <- as.character(pop(gid))
df <- genind2df(gid, sep = "/", usepop = FALSE)
if (any(ploid > 1)){
df <- generate_bruvo_mat(df, maxploid = max(ploid), sep = "/", mat = TRUE)
df <- generate_bruvo_mat(df, maxploid = max(ploid), sep = "/", mat_type = "character")
}
df[is.na(df)] <- 0

# making sure that the individual names are included.
if(all(indNames(gid) == "") | is.null(indNames(gid))){
indNames(gid) <- paste("ind", 1:nInd(gid), sep="")
indNames(gid) <- paste("ind", seq(nInd(gid)), sep="")
}
df <- cbind(indNames(gid), the_gid, df)
# setting the NA replacement. This doesn't work too well.
replacement <- ifelse(gid@type == "PA", "-1", "0")
replacement <- if(gid@type == "PA") "-1" else "0"
if(!quiet) cat("Writing the table to", filename, "... ")

if(geo == TRUE & !is.null(gid$other[[geodf]])){
Expand Down
98 changes: 57 additions & 41 deletions R/internal.r
Original file line number Diff line number Diff line change
Expand Up @@ -1614,52 +1614,55 @@ make_poppr_plot_title <- function(samp, file = NULL, N = NULL, pop = NULL){
return(plot_title)
}

#==============================================================================#
# fill a single genotype with zeroes if the number of alleles is maxploid.
#
# Public functions utilizing this function:
# # none
#
# Private functions utilizing this function:
# # fill_zero_locus
#==============================================================================#
fill_zero <- function(x, maxploid, mat = FALSE){
#' Pad a single locus genotype with zeroes according the maximum ploidy.
#'
#' @param x a vector of alleles for a single individual at a single locus
#' @param maxploid the maximum ploidy to pad
#' @param mat_type if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso used by: [fill_zero_locus()]
fill_zero <- function(x, maxploid, mat_type = character(0)){
if (length(x) < maxploid){
if (!mat){
# If the genotype is less than the max ploidy, fill it with a zero
if (length(mat_type)) {
fill <- as(0L, mat_type)
pad <- rep(fill, maxploid - length(x))
res <- c(pad, as(x, mat_type))
} else {
zeroes <- paste(rep(0, maxploid - length(x)), collapse = "/")
res <- paste(x, collapse = "/")
res <- paste(zeroes, res, sep = "/")
} else {
res <- c(rep(0.0, maxploid - length(x)), as.numeric(x))
}

} else {
if (!mat){
res <- paste(x, collapse = "/")
# If the genotype is the right format_type, either collapse it or return it
if (length(mat_type)){
res <- as(x, mat_type)
} else {
res <- as.numeric(x)
res <- paste(x, collapse = "/")
}
}
return(res)
}

#==============================================================================#
# Fill short genotypes in a character vector with zeroes.
#
# Public functions utilizing this function:
# # none
#
# Private functions utilizing this function:
# # generate_bruvo_mat
#==============================================================================#
fill_zero_locus <- function(x, sep = "/", maxploid, mat = FALSE){
#' Fill short genotypes in a character vector with zeroes
#'
#' @param x a character vector of genotypes at a single locus, separated by "/"
#' @param maxploid the maximum ploidy to pad
#' @param mat_type if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso uses: [fill_zero_locus()], used by: [create_bruvo_mat()]
fill_zero_locus <- function(x, sep = "/", maxploid, mat_type = character(0)){
x <- strsplit(x, sep)
if (mat){
result <- numeric(maxploid)
if (length(mat_type)) {
result <- vector(mode = mat_type, length = maxploid)
} else {
result <- character(1)
}
return(t(vapply(x, fill_zero, result, maxploid, mat)))
return(t(vapply(x, fill_zero, result, maxploid, mat_type)))
}

#==============================================================================#
Expand Down Expand Up @@ -1708,19 +1711,32 @@ fill_zero_locus <- function(x, sep = "/", maxploid, mat = FALSE){
# sample_10 0 0 41 31 0 17 30 57
#
#
# Public functions utilizing this function:
# # none
#
# Private functions utilizing this function:
# # none
#==============================================================================#
generate_bruvo_mat <- function(x, maxploid, sep = "/", mat = FALSE){
if (mat){
result <- matrix(numeric(nrow(x)*maxploid), ncol = maxploid, nrow = nrow(x))

#' Fill short genotypes in a data frame with zeroes
#'
#' @param x a data frame of character vectors representing genotypes with alleles separated by "/"
#' @param maxploid the maximum ploidy to pad
#' @param mat if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' Default is an empty character vector, indicating that alleles should be concatenated.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso uses: [fill_zero_locus()], used by: [genind2genalex()]
generate_bruvo_mat <- function(x, maxploid, sep = "/", mat_type = character(0)){
# --- 2021-01-30 ---
# mat has been renamed to mat_type and recast as a character vector. For
# details, see https://github.com/grunwaldlab/poppr/issues/108
# ------------------
# Create a template for vapply to fill in with the result.
if (length(mat_type)) {
# Each locus will be a matrix with one allele per column
fill <- vector(mode = mat_type, length = nrow(x) * maxploid)
result <- matrix(fill, ncol = maxploid, nrow = nrow(x))
} else {
# Each locus will be a character vector with all the alleles
result <- character(nrow(x))
}
res <- vapply(x, fill_zero_locus, result, sep, maxploid, mat)
res <- vapply(x, fill_zero_locus, result, sep, maxploid, mat_type)
if (length(dim(res)) > 2){
redim <- dim(res)
dim(res) <- c(redim[1], redim[2]*redim[3])
Expand All @@ -1731,7 +1747,7 @@ generate_bruvo_mat <- function(x, maxploid, sep = "/", mat = FALSE){
} else {
colnames(res) <- colnames(x)
}
if (!mat){
if (length(mat_type) == 0) {
res[grep("NA", res)] <- NA_character_
}
rownames(res) <- rownames(x)
Expand Down
2 changes: 1 addition & 1 deletion R/methods.r
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ setMethod(
replen <- match_replen_to_loci(locNames(gen), replen)
ploid <- max(ploidy(gen))
popdf <- genind2df(gen, sep = "/", usepop = FALSE)
mat <- generate_bruvo_mat(popdf, maxploid = ploid, sep = "/", mat = TRUE)
mat <- generate_bruvo_mat(popdf, maxploid = ploid, sep = "/", mat_type = "numeric")
mat[is.na(mat)] <- 0
slot(.Object, "mat") <- mat
slot(.Object, "replen") <- replen
Expand Down
41 changes: 41 additions & 0 deletions tests/testthat/test-import.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,47 @@ test_that("not specifying a file for genind2genalex will generate a tempfile", {
expect_is(read.genalex(f), "genclone")
})

test_that("genind2genalex() handles snp data appropriately", {
# context: https://github.com/grunwaldlab/poppr/issues/231
tmp <- tempfile(fileext = ".csv")
on.exit(unlink(tmp), add = TRUE)
x <- new("genind", tab = structure(c(NA, 2L, 2L, 2L, 2L, NA, 0L, 0L,
0L, 0L, NA, 2L, 2L, 2L, 2L, NA, 0L, 0L, 0L, 0L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 0L, 0L, 1L), .Dim = 5:6, .Dimnames = list(c("TT056001.trim",
"TT060001.trim", "TT062001.trim", "TT063001.trim", "TT064001.trim"
), c("loc87_pos30.A", "loc87_pos30.G", "loc106_pos31.G", "loc106_pos31.T",
"loc345_pos27.G", "loc345_pos27.T"))), loc.fac = structure(c(1L,
1L, 2L, 2L, 3L, 3L), .Label = c("loc87_pos30", "loc106_pos31",
"loc345_pos27"), class = "factor"), loc.n.all = c(loc87_pos30 = 2L,
loc106_pos31 = 2L, loc345_pos27 = 2L), all.names = list(loc87_pos30 = c("A",
"G"), loc106_pos31 = c("G", "T"), loc345_pos27 = c("G", "T")),
ploidy = c(2L, 2L, 2L, 2L, 2L), type = "codom", other = list(),
call = .local(x = x, i = i, j = j, loc = ..1, drop = drop),
pop = NULL, strata = NULL, hierarchy = NULL)
expect_output(genind2genalex(x, tmp), "Extracting the table ...")
y <- read.genalex(tmp)
expect_equal(genind2df(x, pop = FALSE), genind2df(y, pop = FALSE))
})

test_that("fill_zero() works with character and numeric data", {
char <- "A"
num <- "13"

# Default
expect_equal(fill_zero(char, 2), "0/A")
expect_equal(fill_zero(num, 2), "0/13")
expect_equal(fill_zero(char, 3, character(0)), "0/0/A")
expect_equal(fill_zero(num, 3, character(0)), "0/0/13")

# As character vector
expect_equal(fill_zero(char, 3, "character"), c("0", "0", "A"))
expect_equal(fill_zero(num, 3, "character"), c("0", "0", "13"))

# As numeric vector
expect_equal(expect_warning(fill_zero(char, 3, "numeric")), c(0, 0, NA_real_))
expect_equal(fill_zero(num, 3, "numeric"), c(0.0, 0.0, 13.0))
})

test_that("genind2genalex will prevent a file from being overwritten", {
skip_on_cran()
f <- tempfile()
Expand Down

0 comments on commit 9259a28

Please sign in to comment.