From 94b105836081f93dbe875526f763c64bbaa7a597 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Priv=C3=A9?= <florian.prive.21@gmail.com>
Date: Sun, 31 Mar 2024 08:36:42 +0200
Subject: [PATCH] improve doc

---
 R/read-bgen.R       | 30 ++++++++++++++++--------------
 man/snp_prodBGEN.Rd |  4 ++--
 man/snp_readBGEN.Rd | 20 +++++++++++---------
 3 files changed, 29 insertions(+), 25 deletions(-)
diff --git a/R/read-bgen.R b/R/read-bgen.R
index ff4ab958..56b9db53 100644
--- a/R/read-bgen.R
+++ b/R/read-bgen.R
@@ -88,7 +88,7 @@ check_bgen_format <- function(bgenfile) {
 #' Function to read the UK Biobank BGEN files into a [bigSNP][bigSNP-class].
 #'
 #' For more information on this format, please visit
-#' \href{https://bitbucket.org/gavinband/bgen/}{BGEN webpage}.
+#' \href{https://code.enkre.net/bgen}{BGEN webpage}.
 #'
 #' This function is designed to read UK Biobank imputation files. This assumes
 #' that variants have been compressed with zlib, that there are only 2 possible
@@ -110,11 +110,11 @@ check_bgen_format <- function(bgenfile) {
 #' @param backingfile The path (without extension) for the backing files (".bk"
 #'   and ".rds") that are created by this function for storing the
 #'   [bigSNP][bigSNP-class] object.
-#' @param list_snp_id List (same length as the number of BGEN files) of
-#'  character vector of SNP IDs to read. These should be in the form
-#'  `"<chr>_<pos>_<a1>_<a2>"` (e.g. `"1_88169_C_T"` or `"01_88169_C_T"`).
-#'  If you have one BGEN file only, just wrap your vector of IDs with `list()`.
-#'  **This function assumes that these IDs are uniquely identifying variants.**
+#' @param list_snp_id List of character vectors of SNP IDs to read, with one
+#'   vector per BGEN file. Each SNP ID should be in the form
+#'   `"<chr>_<pos>_<a1>_<a2>"` (e.g. `"1_88169_C_T"` or `"01_88169_C_T"`).
+#'   If you have one BGEN file only, just wrap your vector of IDs with `list()`.
+#'   **This function assumes that these IDs are uniquely identifying variants.**
 #' @param bgi_dir Directory of index files. Default is the same as `bgenfiles`.
 #' @param ind_row An optional vector of the row indices (individuals) that
 #'   are used. If not specified, all rows are used. **Don't use negative indices.**
@@ -129,15 +129,17 @@ check_bgen_format <- function(bgenfile) {
 #'   (similar to PLINK option '`--hard-call-threshold random`').
 #'
 #' @return The path to the RDS file `<backingfile>.rds` that stores the `bigSNP`
-#'   object created by this function. Note that this function creates another
-#'   file (*.bk*) which stores the values of the FBM (`$genotypes`). The `$map`
-#'   component of the `bigSNP` object stores some information on the variants
-#'   (including allele frequencies and INFO scores computed from the probabilities).
+#'   object created by this function.\cr
+#'   Note that this function creates another file (*.bk*) which stores the values
+#'   of the FBM (`$genotypes`). The rows corresponds to the order of `ind_row`;
+#'   the columns to the order of `list_snp_id`. The `$map` component of the
+#'   `bigSNP` object stores some information on the variants (including allele
+#'   frequencies and INFO scores computed from the imputation probabilities).
 #'   However, it does not have a `$fam` component; you should use the individual
-#'   IDs in the *.sample* file (filtered with `ind_row`) to add external information
-#'   on the individuals.\cr
-#' __You shouldn't read from BGEN files more than once.__ Instead, use
-#' [snp_attach] to load the "bigSNP" object in any R session from backing files.
+#'   IDs in the *.sample* file (filtered with `ind_row`) to add external
+#'   information on the individuals.\cr
+#'   __You shouldn't read from BGEN files more than once.__ Instead, use
+#'   [snp_attach] to load the "bigSNP" object in any R session from backing files.
 #'
 #' @importFrom magrittr %>%
 #'
diff --git a/man/snp_prodBGEN.Rd b/man/snp_prodBGEN.Rd
index 853c7d53..9b1142e7 100644
--- a/man/snp_prodBGEN.Rd
+++ b/man/snp_prodBGEN.Rd
@@ -21,8 +21,8 @@ The corresponding ".bgen.bgi" index files must exist.}
 
 \item{beta}{A matrix (or a vector), with rows corresponding to \code{list_snp_id}.}
 
-\item{list_snp_id}{List (same length as the number of BGEN files) of
-character vector of SNP IDs to read. These should be in the form
+\item{list_snp_id}{List of character vectors of SNP IDs to read, with one
+vector per BGEN file. Each SNP ID should be in the form
 \code{"<chr>_<pos>_<a1>_<a2>"} (e.g. \code{"1_88169_C_T"} or \code{"01_88169_C_T"}).
 If you have one BGEN file only, just wrap your vector of IDs with \code{list()}.
 \strong{This function assumes that these IDs are uniquely identifying variants.}}
diff --git a/man/snp_readBGEN.Rd b/man/snp_readBGEN.Rd
index b03e8128..e46db433 100644
--- a/man/snp_readBGEN.Rd
+++ b/man/snp_readBGEN.Rd
@@ -22,8 +22,8 @@ The corresponding ".bgen.bgi" index files must exist.}
 and ".rds") that are created by this function for storing the
 \link[=bigSNP-class]{bigSNP} object.}
 
-\item{list_snp_id}{List (same length as the number of BGEN files) of
-character vector of SNP IDs to read. These should be in the form
+\item{list_snp_id}{List of character vectors of SNP IDs to read, with one
+vector per BGEN file. Each SNP ID should be in the form
 \code{"<chr>_<pos>_<a1>_<a2>"} (e.g. \code{"1_88169_C_T"} or \code{"01_88169_C_T"}).
 If you have one BGEN file only, just wrap your vector of IDs with \code{list()}.
 \strong{This function assumes that these IDs are uniquely identifying variants.}}
@@ -47,13 +47,15 @@ You may use \code{\link[=nb_cores]{nb_cores()}}.}
 }
 \value{
 The path to the RDS file \verb{<backingfile>.rds} that stores the \code{bigSNP}
-object created by this function. Note that this function creates another
-file (\emph{.bk}) which stores the values of the FBM (\verb{$genotypes}). The \verb{$map}
-component of the \code{bigSNP} object stores some information on the variants
-(including allele frequencies and INFO scores computed from the probabilities).
+object created by this function.\cr
+Note that this function creates another file (\emph{.bk}) which stores the values
+of the FBM (\verb{$genotypes}). The rows corresponds to the order of \code{ind_row};
+the columns to the order of \code{list_snp_id}. The \verb{$map} component of the
+\code{bigSNP} object stores some information on the variants (including allele
+frequencies and INFO scores computed from the imputation probabilities).
 However, it does not have a \verb{$fam} component; you should use the individual
-IDs in the \emph{.sample} file (filtered with \code{ind_row}) to add external information
-on the individuals.\cr
+IDs in the \emph{.sample} file (filtered with \code{ind_row}) to add external
+information on the individuals.\cr
 \strong{You shouldn't read from BGEN files more than once.} Instead, use
 \link{snp_attach} to load the "bigSNP" object in any R session from backing files.
 }
@@ -62,7 +64,7 @@ Function to read the UK Biobank BGEN files into a \link[=bigSNP-class]{bigSNP}.
 }
 \details{
 For more information on this format, please visit
-\href{https://bitbucket.org/gavinband/bgen/}{BGEN webpage}.
+\href{https://code.enkre.net/bgen}{BGEN webpage}.
 
 This function is designed to read UK Biobank imputation files. This assumes
 that variants have been compressed with zlib, that there are only 2 possible