-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
style: review filter functions style and documentation
- Loading branch information
Showing
8 changed files
with
161 additions
and
120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,166 +1,206 @@ | ||
|
||
#' Reshape and simplify forcis data | ||
#' Reshape and simplify FORCIS data | ||
#' | ||
#' @param data forcis data | ||
#' @description | ||
#' A short description... | ||
#' | ||
#' @param data a `data.frame`, i.e. a FORCIS dataset, except for CPR North data. | ||
#' | ||
#' @return A `data.frame` | ||
#' @return A `data.frame` reshaped in a long format. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' ## ADD EXAMPLE ---- | ||
|
||
reshape_forcis <- function(data){ | ||
reshape_forcis <- function(data) { | ||
|
||
if (get_data_type(data) %in% c("CPR North")) { | ||
stop("This function is not designed to work with 'CPR North' data", call. = FALSE) | ||
if (get_data_type(data) == "CPR North") { | ||
stop("This function is not designed to work with 'CPR North' data", | ||
call. = FALSE) | ||
} | ||
|
||
taxa_cols <- get_species_names(data) | ||
taxa_cols <- get_species_names(data) | ||
metadat_cols <- get_required_columns() | ||
|
||
dat_reshaped <- data %>% | ||
select(all_of(taxa_cols),metadat_cols) %>% | ||
select(all_of(taxa_cols), metadat_cols) %>% | ||
pivot_longer(all_of(taxa_cols), | ||
names_to = 'taxa', | ||
values_to = 'counts') | ||
return(dat_reshaped) | ||
names_to = "taxa", | ||
values_to = "counts") | ||
|
||
dat_reshaped | ||
} | ||
|
||
|
||
|
||
#' Filter forcis data by year of sampling | ||
#' Filter FORCIS data by year of sampling | ||
#' | ||
#' @param data forcis data | ||
#' @param years numeric vector of selected years | ||
#' @description | ||
#' This function can be used to filter FORCIS data by year of sampling. | ||
#' | ||
#' @param data a `data.frame`, i.e. a FORCIS dataset. | ||
#' | ||
#' @param years a `numeric` containing one or several years. | ||
#' | ||
#' @return A `data.frame`. | ||
#' @return A `data.frame` containing a subset of `data` for the desired years. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' ## ADD EXAMPLE ---- | ||
|
||
filter_by_year <- function(data,years){ | ||
filter_by_year <- function(data, years) { | ||
|
||
year_vector <- as.numeric(years) | ||
|
||
if (get_data_type(data)=="Sediment trap"){ | ||
if (get_data_type(data) == "Sediment trap") { | ||
|
||
filtered_dat <- data %>% | ||
filter(! is.na(.data$sample_date_time_start)) %>% | ||
mutate(new_sample_date_start=gsub(' .*','', .data$sample_date_time_start)) %>% | ||
filter(!is.na(.data$sample_date_time_start)) %>% | ||
mutate(new_sample_date_start = gsub(' .*','', | ||
.data$sample_date_time_start)) %>% | ||
mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>% | ||
mutate(year=year(.data$new_sample_date_start)) %>% | ||
mutate(year = year(.data$new_sample_date_start)) %>% | ||
filter(.data$year %in% year_vector) %>% | ||
select(-c(.data$year,.data$new_sample_date_start)) | ||
return(filtered_dat) | ||
select(-c(.data$year, .data$new_sample_date_start)) | ||
|
||
} else { | ||
|
||
filtered_dat <- data %>% | ||
filter(! is.na(.data$profile_date_time)) %>% | ||
filter(!is.na(.data$profile_date_time)) %>% | ||
mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>% | ||
mutate(year=year(.data$new_profile_date_time)) %>% | ||
mutate(year = year(.data$new_profile_date_time)) %>% | ||
filter(.data$year %in% year_vector) %>% | ||
select(-c(.data$year,.data$new_profile_date_time)) | ||
return(filtered_dat) | ||
select(-c(.data$year, .data$new_profile_date_time)) | ||
} | ||
|
||
filtered_dat | ||
} | ||
|
||
|
||
#' Filter forcis data by month of sampling | ||
|
||
#' Filter FORCIS data by month of sampling | ||
#' | ||
#' @param data forcis data | ||
#' @param months numeric vector of selected months | ||
#' @description | ||
#' This function can be used to filter FORCIS data by month of sampling. | ||
#' | ||
#' @param data a `data.frame`, i.e. a FORCIS dataset. | ||
#' | ||
#' @param months a `numeric` containing one or several months. | ||
#' | ||
#' @return A `data.frame`. | ||
#' @return A `data.frame` containing a subset of `data` for the desired months. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' ## ADD EXAMPLE ---- | ||
|
||
filter_by_month <- function(data,months){ | ||
filter_by_month <- function(data, months) { | ||
|
||
month_vector <- as.numeric(months) | ||
if (get_data_type(data)=="Sediment trap"){ | ||
|
||
if (get_data_type(data) == "Sediment trap") { | ||
|
||
filtered_dat <- data %>% | ||
filter(! is.na(.data$sample_date_time_start)) %>% | ||
mutate(new_sample_date_start=gsub(' .*','', .data$sample_date_time_start)) %>% | ||
filter(!is.na(.data$sample_date_time_start)) %>% | ||
mutate(new_sample_date_start = gsub(' .*','', | ||
.data$sample_date_time_start)) %>% | ||
mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>% | ||
mutate(month=month(.data$new_sample_date_start)) %>% | ||
filter(.data$month %in% month_vector) %>% | ||
select(-c(.data$month,.data$new_sample_date_start)) | ||
return(filtered_dat) | ||
|
||
} else { | ||
|
||
filtered_dat <- data %>% | ||
filter(! is.na(.data$profile_date_time)) %>% | ||
mutate(new_profile_date_time =dmy(.data$profile_date_time)) %>% | ||
mutate(month=month(.data$new_profile_date_time)) %>% | ||
filter(!is.na(.data$profile_date_time)) %>% | ||
mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>% | ||
mutate(month = month(.data$new_profile_date_time)) %>% | ||
filter(.data$month %in% month_vector)%>% | ||
select(-c(.data$month,.data$new_profile_date_time)) | ||
return(filtered_dat) | ||
} | ||
select(-c(.data$month, .data$new_profile_date_time)) | ||
} | ||
|
||
filtered_dat | ||
} | ||
|
||
|
||
#' Filter forcis data by coordinate square | ||
#' Filter FORCIS data by a spatial bounding box | ||
#' | ||
#' @param data forcis data | ||
#' @param coord_square a numeric vector containing in this order minimum latitute, | ||
#' minimum longitude, maximum latitude, maximum longitude | ||
#' @description | ||
#' This function can be used to filter FORCIS data by a spatial bounding box. | ||
#' | ||
#' @param data a `data.frame`, i.e. a FORCIS dataset. | ||
#' | ||
#' @param coord_square a vector of four `numeric` values defining a square | ||
#' bounding box. Values must follow this order: minimum latitude, minimum | ||
#' longitude, maximum latitude, and maximum longitude. | ||
#' | ||
#' @return A `data.frame`. | ||
#' @return A `data.frame` containing a subset of `data`. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' ## ADD EXAMPLE ---- | ||
|
||
filter_by_coordinates <- function(data, coord_square){ | ||
filter_by_bbox <- function(data, coord_square) { | ||
|
||
min_lat <- coord_square[1] | ||
min_lat <- coord_square[1] | ||
min_long <- coord_square[2] | ||
max_lat <- coord_square[3] | ||
max_lat <- coord_square[3] | ||
max_long <- coord_square[3] | ||
|
||
filtered_dat <- data %>% | ||
filter(! is.na(.data$site_lat_start_decimal)) %>% | ||
filter(! is.na (.data$site_lon_start_decimal)) %>% | ||
filter(.data$site_lat_start_decimal>= min_lat & | ||
.data$site_lat_start_decimal <=max_lat & | ||
.data$site_lon_start_decimal>= min_long & | ||
.data$site_lon_start_decimal <=max_long) | ||
|
||
return(filtered_dat) | ||
data %>% | ||
filter(!is.na(.data$site_lat_start_decimal)) %>% | ||
filter(!is.na(.data$site_lon_start_decimal)) %>% | ||
filter(.data$site_lat_start_decimal >= min_lat & | ||
.data$site_lat_start_decimal <= max_lat & | ||
.data$site_lon_start_decimal >= min_long & | ||
.data$site_lon_start_decimal <= max_long) | ||
} | ||
|
||
#' Filter forcis data by species | ||
|
||
|
||
#' Filter FORCIS data by species | ||
#' | ||
#' @param data forcis data in long format, except for CPR North data | ||
#' @param species a character vector listing species of interest | ||
#' @param remove_NAs logical, If FALSE, retains all taxa including those with NA counts | ||
#' @return A `data.frame` | ||
#' @description | ||
#' A short description... | ||
#' | ||
#' @param data a `data.frame`, i.e. a FORCIS dataset, except for CPR North data. | ||
#' | ||
#' @param species a `character` vector listing species of interest. | ||
#' | ||
#' @param rm_na a `logical` value. If `FALSE`, keeps taxa with `NA` counts. | ||
#' | ||
#' @return A `data.frame` containing a subset of `data`. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' ## ADD EXAMPLE ---- | ||
|
||
filter_by_species <- function (data,species, remove_NAs=TRUE ){ | ||
my_species <- as.character(species) | ||
filter_by_species <- function (data, species, rm_na = FALSE) { | ||
|
||
taxa_cols <- get_species_names(data) | ||
my_species <- as.character(species) | ||
taxa_cols <- get_species_names(data) | ||
|
||
if (length(taxa_cols)>0) { | ||
stop("This function requires data in long format", call. = FALSE) | ||
if (length(taxa_cols) > 0) { | ||
stop("This function requires data in long format. Please use the function ", | ||
"'reshape_forcis()'", call. = FALSE) | ||
} | ||
|
||
if (get_data_type(data) %in% c("CPR North")) { | ||
stop("This function is not designed to work with 'CPR North' data", call. = FALSE) | ||
if (get_data_type(data) == "CPR North") { | ||
stop("This function is not designed to work with 'CPR North' data", | ||
call. = FALSE) | ||
} | ||
|
||
filtered_dat <- data %>% | ||
filter(.data$taxa %in% my_species) | ||
|
||
if(remove_NAs) { | ||
if (rm_na) { | ||
|
||
filtered_dat <- filtered_dat %>% | ||
filter(! is.na(.data$counts)) | ||
filter(!is.na(.data$counts)) | ||
} | ||
|
||
return(filtered_dat) | ||
filtered_dat | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.