Skip to content

Commit

Permalink
style: review filter functions style and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
ahasverus committed Jan 18, 2024
1 parent efe5821 commit 7218e5e
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 120 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export(compute_abundances)
export(compute_concentration_bins_CPR_n)
export(compute_concentrations)
export(compute_frequencies)
export(filter_by_coordinates)
export(filter_by_bbox)
export(filter_by_month)
export(filter_by_species)
export(filter_by_year)
Expand Down
190 changes: 115 additions & 75 deletions R/filter.R
Original file line number Diff line number Diff line change
@@ -1,166 +1,206 @@

#' Reshape and simplify forcis data
#' Reshape and simplify FORCIS data
#'
#' @param data forcis data
#' @description
#' A short description...
#'
#' @param data a `data.frame`, i.e. a FORCIS dataset, except for CPR North data.
#'
#' @return A `data.frame`
#' @return A `data.frame` reshaped in a long format.
#'
#' @export
#'
#' @examples
#' ## ADD EXAMPLE ----

reshape_forcis <- function(data){
reshape_forcis <- function(data) {

if (get_data_type(data) %in% c("CPR North")) {
stop("This function is not designed to work with 'CPR North' data", call. = FALSE)
if (get_data_type(data) == "CPR North") {
stop("This function is not designed to work with 'CPR North' data",
call. = FALSE)
}

taxa_cols <- get_species_names(data)
taxa_cols <- get_species_names(data)
metadat_cols <- get_required_columns()

dat_reshaped <- data %>%
select(all_of(taxa_cols),metadat_cols) %>%
select(all_of(taxa_cols), metadat_cols) %>%
pivot_longer(all_of(taxa_cols),
names_to = 'taxa',
values_to = 'counts')
return(dat_reshaped)
names_to = "taxa",
values_to = "counts")

dat_reshaped
}



#' Filter forcis data by year of sampling
#' Filter FORCIS data by year of sampling
#'
#' @param data forcis data
#' @param years numeric vector of selected years
#' @description
#' This function can be used to filter FORCIS data by year of sampling.
#'
#' @param data a `data.frame`, i.e. a FORCIS dataset.
#'
#' @param years a `numeric` containing one or several years.
#'
#' @return A `data.frame`.
#' @return A `data.frame` containing a subset of `data` for the desired years.
#'
#' @export
#'
#' @examples
#' ## ADD EXAMPLE ----

filter_by_year <- function(data,years){
filter_by_year <- function(data, years) {

year_vector <- as.numeric(years)

if (get_data_type(data)=="Sediment trap"){
if (get_data_type(data) == "Sediment trap") {

filtered_dat <- data %>%
filter(! is.na(.data$sample_date_time_start)) %>%
mutate(new_sample_date_start=gsub(' .*','', .data$sample_date_time_start)) %>%
filter(!is.na(.data$sample_date_time_start)) %>%
mutate(new_sample_date_start = gsub(' .*','',
.data$sample_date_time_start)) %>%
mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>%
mutate(year=year(.data$new_sample_date_start)) %>%
mutate(year = year(.data$new_sample_date_start)) %>%
filter(.data$year %in% year_vector) %>%
select(-c(.data$year,.data$new_sample_date_start))
return(filtered_dat)
select(-c(.data$year, .data$new_sample_date_start))

} else {

filtered_dat <- data %>%
filter(! is.na(.data$profile_date_time)) %>%
filter(!is.na(.data$profile_date_time)) %>%
mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>%
mutate(year=year(.data$new_profile_date_time)) %>%
mutate(year = year(.data$new_profile_date_time)) %>%
filter(.data$year %in% year_vector) %>%
select(-c(.data$year,.data$new_profile_date_time))
return(filtered_dat)
select(-c(.data$year, .data$new_profile_date_time))
}

filtered_dat
}


#' Filter forcis data by month of sampling

#' Filter FORCIS data by month of sampling
#'
#' @param data forcis data
#' @param months numeric vector of selected months
#' @description
#' This function can be used to filter FORCIS data by month of sampling.
#'
#' @param data a `data.frame`, i.e. a FORCIS dataset.
#'
#' @param months a `numeric` containing one or several months.
#'
#' @return A `data.frame`.
#' @return A `data.frame` containing a subset of `data` for the desired months.
#'
#' @export
#'
#' @examples
#' ## ADD EXAMPLE ----

filter_by_month <- function(data,months){
filter_by_month <- function(data, months) {

month_vector <- as.numeric(months)
if (get_data_type(data)=="Sediment trap"){

if (get_data_type(data) == "Sediment trap") {

filtered_dat <- data %>%
filter(! is.na(.data$sample_date_time_start)) %>%
mutate(new_sample_date_start=gsub(' .*','', .data$sample_date_time_start)) %>%
filter(!is.na(.data$sample_date_time_start)) %>%
mutate(new_sample_date_start = gsub(' .*','',
.data$sample_date_time_start)) %>%
mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>%
mutate(month=month(.data$new_sample_date_start)) %>%
filter(.data$month %in% month_vector) %>%
select(-c(.data$month,.data$new_sample_date_start))
return(filtered_dat)

} else {

filtered_dat <- data %>%
filter(! is.na(.data$profile_date_time)) %>%
mutate(new_profile_date_time =dmy(.data$profile_date_time)) %>%
mutate(month=month(.data$new_profile_date_time)) %>%
filter(!is.na(.data$profile_date_time)) %>%
mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>%
mutate(month = month(.data$new_profile_date_time)) %>%
filter(.data$month %in% month_vector)%>%
select(-c(.data$month,.data$new_profile_date_time))
return(filtered_dat)
}
select(-c(.data$month, .data$new_profile_date_time))
}

filtered_dat
}


#' Filter forcis data by coordinate square
#' Filter FORCIS data by a spatial bounding box
#'
#' @param data forcis data
#' @param coord_square a numeric vector containing in this order minimum latitute,
#' minimum longitude, maximum latitude, maximum longitude
#' @description
#' This function can be used to filter FORCIS data by a spatial bounding box.
#'
#' @param data a `data.frame`, i.e. a FORCIS dataset.
#'
#' @param coord_square a vector of four `numeric` values defining a square
#' bounding box. Values must follow this order: minimum latitude, minimum
#' longitude, maximum latitude, and maximum longitude.
#'
#' @return A `data.frame`.
#' @return A `data.frame` containing a subset of `data`.
#'
#' @export
#'
#' @examples
#' ## ADD EXAMPLE ----

filter_by_coordinates <- function(data, coord_square){
filter_by_bbox <- function(data, coord_square) {

min_lat <- coord_square[1]
min_lat <- coord_square[1]
min_long <- coord_square[2]
max_lat <- coord_square[3]
max_lat <- coord_square[3]
max_long <- coord_square[3]

filtered_dat <- data %>%
filter(! is.na(.data$site_lat_start_decimal)) %>%
filter(! is.na (.data$site_lon_start_decimal)) %>%
filter(.data$site_lat_start_decimal>= min_lat &
.data$site_lat_start_decimal <=max_lat &
.data$site_lon_start_decimal>= min_long &
.data$site_lon_start_decimal <=max_long)

return(filtered_dat)
data %>%
filter(!is.na(.data$site_lat_start_decimal)) %>%
filter(!is.na(.data$site_lon_start_decimal)) %>%
filter(.data$site_lat_start_decimal >= min_lat &
.data$site_lat_start_decimal <= max_lat &
.data$site_lon_start_decimal >= min_long &
.data$site_lon_start_decimal <= max_long)
}

#' Filter forcis data by species


#' Filter FORCIS data by species
#'
#' @param data forcis data in long format, except for CPR North data
#' @param species a character vector listing species of interest
#' @param remove_NAs logical, If FALSE, retains all taxa including those with NA counts
#' @return A `data.frame`
#' @description
#' A short description...
#'
#' @param data a `data.frame`, i.e. a FORCIS dataset, except for CPR North data.
#'
#' @param species a `character` vector listing species of interest.
#'
#' @param rm_na a `logical` value. If `FALSE`, keeps taxa with `NA` counts.
#'
#' @return A `data.frame` containing a subset of `data`.
#'
#' @export
#'
#' @examples
#' ## ADD EXAMPLE ----

filter_by_species <- function (data,species, remove_NAs=TRUE ){
my_species <- as.character(species)
filter_by_species <- function (data, species, rm_na = FALSE) {

taxa_cols <- get_species_names(data)
my_species <- as.character(species)
taxa_cols <- get_species_names(data)

if (length(taxa_cols)>0) {
stop("This function requires data in long format", call. = FALSE)
if (length(taxa_cols) > 0) {
stop("This function requires data in long format. Please use the function ",
"'reshape_forcis()'", call. = FALSE)
}

if (get_data_type(data) %in% c("CPR North")) {
stop("This function is not designed to work with 'CPR North' data", call. = FALSE)
if (get_data_type(data) == "CPR North") {
stop("This function is not designed to work with 'CPR North' data",
call. = FALSE)
}

filtered_dat <- data %>%
filter(.data$taxa %in% my_species)

if(remove_NAs) {
if (rm_na) {

filtered_dat <- filtered_dat %>%
filter(! is.na(.data$counts))
filter(!is.na(.data$counts))
}

return(filtered_dat)
filtered_dat
}
24 changes: 24 additions & 0 deletions man/filter_by_bbox.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 0 additions & 23 deletions man/filter_by_coordinates.Rd

This file was deleted.

10 changes: 5 additions & 5 deletions man/filter_by_month.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions man/filter_by_species.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7218e5e

Please sign in to comment.