style: review filter functions style and documentation

FRBCesab · Jan 18, 2024 · 7218e5e · 7218e5e
1 parent efe5821
commit 7218e5e
Show file tree

Hide file tree

Showing 8 changed files with 161 additions and 120 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,7 +4,7 @@ export(compute_abundances)
 export(compute_concentration_bins_CPR_n)
 export(compute_concentrations)
 export(compute_frequencies)
-export(filter_by_coordinates)
+export(filter_by_bbox)
 export(filter_by_month)
 export(filter_by_species)
 export(filter_by_year)

diff --git a/R/filter.R b/R/filter.R
@@ -1,166 +1,206 @@
-
-#' Reshape and simplify forcis data
+#' Reshape and simplify FORCIS data
 #'
-#' @param data forcis data
+#' @description
+#' A short description...
+#' 
+#' @param data a `data.frame`, i.e. a FORCIS dataset, except for CPR North data.
 #'
-#' @return A `data.frame`
+#' @return A `data.frame` reshaped in a long format.
+#' 
 #' @export
 #'
 #' @examples
 #' ## ADD EXAMPLE ----
 
-reshape_forcis <- function(data){
+reshape_forcis <- function(data) {
 
-  if (get_data_type(data) %in% c("CPR North")) {
-    stop("This function is not designed to work with 'CPR North' data", call. = FALSE) 
+  if (get_data_type(data) == "CPR North") {
+    stop("This function is not designed to work with 'CPR North' data", 
+         call. = FALSE) 
   }
 
-  taxa_cols <- get_species_names(data) 
+  taxa_cols    <- get_species_names(data) 
   metadat_cols <- get_required_columns()
+
   dat_reshaped <- data %>% 
-    select(all_of(taxa_cols),metadat_cols) %>% 
+    select(all_of(taxa_cols), metadat_cols) %>% 
     pivot_longer(all_of(taxa_cols), 
-                 names_to  = 'taxa', 
-                 values_to = 'counts')
-  return(dat_reshaped)
+                 names_to  = "taxa", 
+                 values_to = "counts")
+
+  dat_reshaped
 }
 
 
 
-#' Filter forcis data by year of sampling
+#' Filter FORCIS data by year of sampling
 #'
-#' @param data forcis data
-#' @param years numeric vector of selected years
+#' @description
+#' This function can be used to filter FORCIS data by year of sampling.
+#' 
+#' @param data a `data.frame`, i.e. a FORCIS dataset.
+#' 
+#' @param years a `numeric` containing one or several years.
 #'
-#' @return A `data.frame`.
+#' @return A `data.frame` containing a subset of `data` for the desired years.
+#' 
 #' @export
 #'
 #' @examples
 #' ## ADD EXAMPLE ----
 
-filter_by_year <- function(data,years){
+filter_by_year <- function(data, years) {
+
   year_vector <- as.numeric(years)
 
-  if (get_data_type(data)=="Sediment trap"){
+  if (get_data_type(data) == "Sediment trap") {
+
     filtered_dat <- data %>%
-      filter(! is.na(.data$sample_date_time_start)) %>%
-      mutate(new_sample_date_start=gsub(' .*','', .data$sample_date_time_start)) %>% 
+      filter(!is.na(.data$sample_date_time_start)) %>%
+      mutate(new_sample_date_start = gsub(' .*','', 
+                                          .data$sample_date_time_start)) %>% 
       mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>%
-      mutate(year=year(.data$new_sample_date_start)) %>% 
+      mutate(year = year(.data$new_sample_date_start)) %>% 
       filter(.data$year %in% year_vector) %>%
-      select(-c(.data$year,.data$new_sample_date_start))
-    return(filtered_dat)
+      select(-c(.data$year, .data$new_sample_date_start))
 
   } else {
+
     filtered_dat <- data %>% 
-      filter(! is.na(.data$profile_date_time)) %>% 
+      filter(!is.na(.data$profile_date_time)) %>% 
       mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>% 
-      mutate(year=year(.data$new_profile_date_time)) %>% 
+      mutate(year = year(.data$new_profile_date_time)) %>% 
       filter(.data$year %in% year_vector) %>% 
-      select(-c(.data$year,.data$new_profile_date_time))
-    return(filtered_dat)
+      select(-c(.data$year, .data$new_profile_date_time))
   }
+
+  filtered_dat
 }
 
 
-#' Filter forcis data by month of sampling
+
+#' Filter FORCIS data by month of sampling
 #'
-#' @param data forcis data 
-#' @param months  numeric vector of selected months
+#' @description
+#' This function can be used to filter FORCIS data by month of sampling.
+#' 
+#' @param data a `data.frame`, i.e. a FORCIS dataset.
+#' 
+#' @param months a `numeric` containing one or several months.
 #'
-#' @return A `data.frame`.
+#' @return A `data.frame` containing a subset of `data` for the desired months.
+#' 
 #' @export
 #' 
 #' @examples
 #' ## ADD EXAMPLE ----
 
-filter_by_month <- function(data,months){
+filter_by_month <- function(data, months) {
 
   month_vector <- as.numeric(months)
-  if (get_data_type(data)=="Sediment trap"){
+
+  if (get_data_type(data) == "Sediment trap") {
+
     filtered_dat <- data %>%
-      filter(! is.na(.data$sample_date_time_start)) %>%
-      mutate(new_sample_date_start=gsub(' .*','', .data$sample_date_time_start)) %>% 
+      filter(!is.na(.data$sample_date_time_start)) %>%
+      mutate(new_sample_date_start = gsub(' .*','', 
+                                          .data$sample_date_time_start)) %>% 
       mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>%
       mutate(month=month(.data$new_sample_date_start)) %>% 
       filter(.data$month %in% month_vector) %>%
       select(-c(.data$month,.data$new_sample_date_start))
-    return(filtered_dat)
 
   } else {
+
     filtered_dat <- data %>% 
-      filter(! is.na(.data$profile_date_time)) %>% 
-      mutate(new_profile_date_time =dmy(.data$profile_date_time)) %>% 
-      mutate(month=month(.data$new_profile_date_time)) %>% 
+      filter(!is.na(.data$profile_date_time)) %>% 
+      mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>% 
+      mutate(month = month(.data$new_profile_date_time)) %>% 
       filter(.data$month %in% month_vector)%>% 
-      select(-c(.data$month,.data$new_profile_date_time))
-    
-    return(filtered_dat)
-  } 
+      select(-c(.data$month, .data$new_profile_date_time))
+  }
+
+  filtered_dat
 }
 
 
-#' Filter forcis data by coordinate square 
+#' Filter FORCIS data by a spatial bounding box 
 #'
-#' @param data forcis data 
-#' @param coord_square a numeric vector containing in this order minimum latitute,
-#' minimum longitude, maximum latitude, maximum longitude
+#' @description
+#' This function can be used to filter FORCIS data by a spatial bounding box.
+#' 
+#' @param data a `data.frame`, i.e. a FORCIS dataset.
+#' 
+#' @param coord_square a vector of four `numeric` values defining a square 
+#'   bounding box. Values must follow this order: minimum latitude, minimum 
+#'   longitude, maximum latitude, and maximum longitude.
 #'
-#' @return A `data.frame`.
+#' @return A `data.frame` containing a subset of `data`.
+#' 
 #' @export
 #'
 #' @examples
 #' ## ADD EXAMPLE ----
 
-filter_by_coordinates <- function(data, coord_square){
+filter_by_bbox <- function(data, coord_square) {
 
-  min_lat <- coord_square[1] 
+  min_lat  <- coord_square[1] 
   min_long <- coord_square[2] 
-  max_lat <- coord_square[3]
+  max_lat  <- coord_square[3]
   max_long <- coord_square[3]
 
-  filtered_dat <- data %>% 
-    filter(! is.na(.data$site_lat_start_decimal)) %>% 
-    filter(! is.na (.data$site_lon_start_decimal)) %>%   
-    filter(.data$site_lat_start_decimal>= min_lat &
-             .data$site_lat_start_decimal <=max_lat &
-             .data$site_lon_start_decimal>= min_long &
-             .data$site_lon_start_decimal <=max_long)
-
-  return(filtered_dat)
+  data %>% 
+    filter(!is.na(.data$site_lat_start_decimal)) %>% 
+    filter(!is.na(.data$site_lon_start_decimal)) %>%   
+    filter(.data$site_lat_start_decimal >= min_lat &
+           .data$site_lat_start_decimal <= max_lat &
+           .data$site_lon_start_decimal >= min_long &
+           .data$site_lon_start_decimal <= max_long)
 }
 
-#' Filter forcis data by species 
+
+
+#' Filter FORCIS data by species 
 #'
-#' @param data forcis data in long format, except for CPR North data
-#' @param species a character vector listing species of interest
-#' @param remove_NAs logical, If FALSE, retains all taxa including those with NA counts
-#' @return A `data.frame`
+#' @description
+#' A short description...
+#' 
+#' @param data a `data.frame`, i.e. a FORCIS dataset, except for CPR North data.
+#' 
+#' @param species a `character` vector listing species of interest.
+#' 
+#' @param rm_na a `logical` value. If `FALSE`, keeps taxa with `NA` counts.
+#' 
+#' @return A `data.frame` containing a subset of `data`.
+#' 
 #' @export
 #'
 #' @examples
 #' ## ADD EXAMPLE ----
 
-filter_by_species <- function (data,species, remove_NAs=TRUE ){
-  my_species <- as.character(species)
+filter_by_species <- function (data, species, rm_na = FALSE) {
 
-  taxa_cols <- get_species_names(data) 
+  my_species <- as.character(species)
+  taxa_cols  <- get_species_names(data) 
 
-  if (length(taxa_cols)>0) {
-    stop("This function requires data in long format", call. = FALSE) 
+  if (length(taxa_cols) > 0) {
+    stop("This function requires data in long format. Please use the function ",
+         "'reshape_forcis()'", call. = FALSE) 
   }
 
-  if (get_data_type(data) %in% c("CPR North")) {
-    stop("This function is not designed to work with 'CPR North' data", call. = FALSE) 
+  if (get_data_type(data) == "CPR North") {
+    stop("This function is not designed to work with 'CPR North' data", 
+         call. = FALSE) 
   }
+
   filtered_dat <- data %>% 
     filter(.data$taxa %in% my_species)
 
-  if(remove_NAs) {
+  if (rm_na) {
 
     filtered_dat <- filtered_dat %>% 
-      filter(! is.na(.data$counts))
+      filter(!is.na(.data$counts))
   }
 
-  return(filtered_dat)
+  filtered_dat
 }
diff --git a/man/filter_by_bbox.Rd b/man/filter_by_bbox.Rd
diff --git a/man/filter_by_coordinates.Rd b/man/filter_by_coordinates.Rd
diff --git a/man/filter_by_month.Rd b/man/filter_by_month.Rd
diff --git a/man/filter_by_species.Rd b/man/filter_by_species.Rd