From 94b105836081f93dbe875526f763c64bbaa7a597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Priv=C3=A9?= Date: Sun, 31 Mar 2024 08:36:42 +0200 Subject: [PATCH] improve doc --- R/read-bgen.R | 30 ++++++++++++++++-------------- man/snp_prodBGEN.Rd | 4 ++-- man/snp_readBGEN.Rd | 20 +++++++++++--------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/R/read-bgen.R b/R/read-bgen.R index ff4ab958..56b9db53 100644 --- a/R/read-bgen.R +++ b/R/read-bgen.R @@ -88,7 +88,7 @@ check_bgen_format <- function(bgenfile) { #' Function to read the UK Biobank BGEN files into a [bigSNP][bigSNP-class]. #' #' For more information on this format, please visit -#' \href{https://bitbucket.org/gavinband/bgen/}{BGEN webpage}. +#' \href{https://code.enkre.net/bgen}{BGEN webpage}. #' #' This function is designed to read UK Biobank imputation files. This assumes #' that variants have been compressed with zlib, that there are only 2 possible @@ -110,11 +110,11 @@ check_bgen_format <- function(bgenfile) { #' @param backingfile The path (without extension) for the backing files (".bk" #' and ".rds") that are created by this function for storing the #' [bigSNP][bigSNP-class] object. -#' @param list_snp_id List (same length as the number of BGEN files) of -#' character vector of SNP IDs to read. These should be in the form -#' `"___"` (e.g. `"1_88169_C_T"` or `"01_88169_C_T"`). -#' If you have one BGEN file only, just wrap your vector of IDs with `list()`. -#' **This function assumes that these IDs are uniquely identifying variants.** +#' @param list_snp_id List of character vectors of SNP IDs to read, with one +#' vector per BGEN file. Each SNP ID should be in the form +#' `"___"` (e.g. `"1_88169_C_T"` or `"01_88169_C_T"`). +#' If you have one BGEN file only, just wrap your vector of IDs with `list()`. +#' **This function assumes that these IDs are uniquely identifying variants.** #' @param bgi_dir Directory of index files. Default is the same as `bgenfiles`. #' @param ind_row An optional vector of the row indices (individuals) that #' are used. If not specified, all rows are used. **Don't use negative indices.** @@ -129,15 +129,17 @@ check_bgen_format <- function(bgenfile) { #' (similar to PLINK option '`--hard-call-threshold random`'). #' #' @return The path to the RDS file `.rds` that stores the `bigSNP` -#' object created by this function. Note that this function creates another -#' file (*.bk*) which stores the values of the FBM (`$genotypes`). The `$map` -#' component of the `bigSNP` object stores some information on the variants -#' (including allele frequencies and INFO scores computed from the probabilities). +#' object created by this function.\cr +#' Note that this function creates another file (*.bk*) which stores the values +#' of the FBM (`$genotypes`). The rows corresponds to the order of `ind_row`; +#' the columns to the order of `list_snp_id`. The `$map` component of the +#' `bigSNP` object stores some information on the variants (including allele +#' frequencies and INFO scores computed from the imputation probabilities). #' However, it does not have a `$fam` component; you should use the individual -#' IDs in the *.sample* file (filtered with `ind_row`) to add external information -#' on the individuals.\cr -#' __You shouldn't read from BGEN files more than once.__ Instead, use -#' [snp_attach] to load the "bigSNP" object in any R session from backing files. +#' IDs in the *.sample* file (filtered with `ind_row`) to add external +#' information on the individuals.\cr +#' __You shouldn't read from BGEN files more than once.__ Instead, use +#' [snp_attach] to load the "bigSNP" object in any R session from backing files. #' #' @importFrom magrittr %>% #' diff --git a/man/snp_prodBGEN.Rd b/man/snp_prodBGEN.Rd index 853c7d53..9b1142e7 100644 --- a/man/snp_prodBGEN.Rd +++ b/man/snp_prodBGEN.Rd @@ -21,8 +21,8 @@ The corresponding ".bgen.bgi" index files must exist.} \item{beta}{A matrix (or a vector), with rows corresponding to \code{list_snp_id}.} -\item{list_snp_id}{List (same length as the number of BGEN files) of -character vector of SNP IDs to read. These should be in the form +\item{list_snp_id}{List of character vectors of SNP IDs to read, with one +vector per BGEN file. Each SNP ID should be in the form \code{"___"} (e.g. \code{"1_88169_C_T"} or \code{"01_88169_C_T"}). If you have one BGEN file only, just wrap your vector of IDs with \code{list()}. \strong{This function assumes that these IDs are uniquely identifying variants.}} diff --git a/man/snp_readBGEN.Rd b/man/snp_readBGEN.Rd index b03e8128..e46db433 100644 --- a/man/snp_readBGEN.Rd +++ b/man/snp_readBGEN.Rd @@ -22,8 +22,8 @@ The corresponding ".bgen.bgi" index files must exist.} and ".rds") that are created by this function for storing the \link[=bigSNP-class]{bigSNP} object.} -\item{list_snp_id}{List (same length as the number of BGEN files) of -character vector of SNP IDs to read. These should be in the form +\item{list_snp_id}{List of character vectors of SNP IDs to read, with one +vector per BGEN file. Each SNP ID should be in the form \code{"___"} (e.g. \code{"1_88169_C_T"} or \code{"01_88169_C_T"}). If you have one BGEN file only, just wrap your vector of IDs with \code{list()}. \strong{This function assumes that these IDs are uniquely identifying variants.}} @@ -47,13 +47,15 @@ You may use \code{\link[=nb_cores]{nb_cores()}}.} } \value{ The path to the RDS file \verb{.rds} that stores the \code{bigSNP} -object created by this function. Note that this function creates another -file (\emph{.bk}) which stores the values of the FBM (\verb{$genotypes}). The \verb{$map} -component of the \code{bigSNP} object stores some information on the variants -(including allele frequencies and INFO scores computed from the probabilities). +object created by this function.\cr +Note that this function creates another file (\emph{.bk}) which stores the values +of the FBM (\verb{$genotypes}). The rows corresponds to the order of \code{ind_row}; +the columns to the order of \code{list_snp_id}. The \verb{$map} component of the +\code{bigSNP} object stores some information on the variants (including allele +frequencies and INFO scores computed from the imputation probabilities). However, it does not have a \verb{$fam} component; you should use the individual -IDs in the \emph{.sample} file (filtered with \code{ind_row}) to add external information -on the individuals.\cr +IDs in the \emph{.sample} file (filtered with \code{ind_row}) to add external +information on the individuals.\cr \strong{You shouldn't read from BGEN files more than once.} Instead, use \link{snp_attach} to load the "bigSNP" object in any R session from backing files. } @@ -62,7 +64,7 @@ Function to read the UK Biobank BGEN files into a \link[=bigSNP-class]{bigSNP}. } \details{ For more information on this format, please visit -\href{https://bitbucket.org/gavinband/bgen/}{BGEN webpage}. +\href{https://code.enkre.net/bgen}{BGEN webpage}. This function is designed to read UK Biobank imputation files. This assumes that variants have been compressed with zlib, that there are only 2 possible