Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trim whitespace at EOL #93

Merged
merged 2 commits into from
Jun 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 35 additions & 35 deletions R/cor_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#' @export
as_matrix.cor_df <- function(x, diagonal) {

# Separate rownames
row_name <- x$rowname
x <- x[, colnames(x) != "rowname"]
Expand All @@ -19,18 +19,18 @@ as_matrix.cor_df <- function(x, diagonal) {

#' @export
shave.cor_df <- function(x, upper = TRUE) {

# Separate rownames
row_name <- x$rowname
x <- x[, colnames(x) != "rowname"]

# Remove upper matrix
if (upper) {
x[upper.tri(x)] <- NA
} else {
x[lower.tri(x)] <- NA
}

# Reappend rownames and class
x <- first_col(x, row_name)
class(x) <- c("cor_df", class(x))
Expand All @@ -39,20 +39,20 @@ shave.cor_df <- function(x, upper = TRUE) {

#' @export
rearrange.cor_df <- function(x, method = "PCA", absolute = TRUE) {

# Convert to original matrix
m <- as_matrix(x, diagonal = 1)
if (absolute) abs(m)

if (absolute) abs(m)

if (method %in% c("BEA", "BEA_TSP", "PCA", "PCA_angle")) {
ord <- seriation::seriate(m, method = method)
} else {
ord <- seriation::seriate(dist(m), method = method)
}

ord <- seriation::get_order(ord)

# Arrange and return matrix
# "c(1, 1 + ..." to handle rowname column
x <- x[ord, c(1, 1 + ord)]
Expand All @@ -68,7 +68,7 @@ focus_.cor_df <- function(x, ..., .dots = NULL, mirror = FALSE) {
vars <- enquos(...)
row_name <- x$rowname
if(length(vars) > 0) {
x <- dplyr::select(x, !!! vars)
x <- dplyr::select(x, !!! vars)
} else {
x <- dplyr::select(x, .dots)
}
Expand All @@ -80,7 +80,7 @@ focus_.cor_df <- function(x, ..., .dots = NULL, mirror = FALSE) {
} else {
x <- first_col(x, row_name)
}

# Exclude these or others from the rows
vars <- x$rowname %in% vars
if (mirror) {
Expand All @@ -94,15 +94,15 @@ focus_.cor_df <- function(x, ..., .dots = NULL, mirror = FALSE) {

#' @export
focus_if.cor_df <- function(x, .predicate, ..., mirror = FALSE) {

# Identify which variables to keep
to_keep <- map_lgl(
x[, colnames(x) != "rowname"],
x[, colnames(x) != "rowname"],
.predicate, ...
)

to_keep <- names(to_keep)[!is.na(to_keep) & to_keep]

if (!length(to_keep)) {
stop("No variables were TRUE given the function.")
}
Expand All @@ -119,18 +119,18 @@ rplot.cor_df <- function(rdf,
colours = c("indianred2", "white", "skyblue1"),
print_cor = FALSE,
colors) {

if (!missing(colors))
colours <- colors

# Store order for factoring the variables
row_order <- rdf$rowname

# Convert data to relevant format for plotting
pd <- stretch(rdf, na.rm = TRUE)
pd <- stretch(rdf, na.rm = TRUE)
pd$size = abs(pd$r)
pd$label = fashion(pd$r)

plot_ <- list(
# Geoms
geom_point(shape = shape),
Expand All @@ -143,12 +143,12 @@ rplot.cor_df <- function(rdf,
if (legend) labs(colour = NULL),
if (!legend) theme(legend.position = "none")
)

ggplot(pd, aes_string(x = "x", y = "y", color = "r",
size = "size", alpha = "size",
label = "label")) +
plot_

# # plot
# ggplot(aes_string(x = "x", y = "y", color = "r",
# size = "size", alpha = "size",
Expand All @@ -157,15 +157,15 @@ rplot.cor_df <- function(rdf,
# scale_colour_gradientn(limits = c(-1, 1), colors = colours) +
# labs(x = "", y ="") +
# theme_classic()
#
#
# if (print_cor) {
# p <- p + geom_text(color = "black", size = 3, show.legend = FALSE)
# }
#
#
# if (!legend) {
# p <- p + theme(legend.position = "none")
# }
#
#
# p
}

Expand All @@ -177,31 +177,31 @@ network_plot.cor_df <- function(rdf,
repel = TRUE,
curved = TRUE,
colors) {

if (min_cor < 0 || min_cor > 1) {
stop ("min_cor must be a value ranging from zero to one.")
}

if (!missing(colors))
colours <- colors

rdf <- as_matrix(rdf, diagonal = 1)
distance <- sign(rdf) * (1 - abs(rdf))

# Use multidimensional Scaling to obtain x and y coordinates for points.
points <- data.frame(stats::cmdscale(abs(distance)))
colnames(points) <- c("x", "y")
points$id <- rownames(points)

# Create a proximity matrix of the paths to be plotted.
proximity <- abs(rdf)
proximity[upper.tri(proximity)] <- NA
diag(proximity) <- NA
proximity[proximity < min_cor] <- NA

# Produce a data frame of data needed for plotting the paths.
n_paths <- sum(!is.na(proximity))
paths <- data.frame(matrix(nrow = n_paths, ncol = 6))
paths <- data.frame(matrix(nrow = n_paths, ncol = 6))
colnames(paths) <- c("x", "y", "xend", "yend", "proximity", "sign")
path <- 1
for(row in 1:nrow(proximity)) {
Expand All @@ -218,17 +218,17 @@ network_plot.cor_df <- function(rdf,
}
}
}

plot_ <- list(
# For plotting paths
if (curved) geom_curve(data = paths,
aes(x = x, y = y, xend = xend, yend = yend,
alpha = proximity, size = proximity,
colour = proximity*sign)),
colour = proximity*sign)),
if (!curved) geom_segment(data = paths,
aes(x = x, y = y, xend = xend, yend = yend,
alpha = proximity, size = proximity,
colour = proximity*sign)),
colour = proximity*sign)),
scale_alpha(limits = c(0, 1)),
scale_size(limits = c(0, 1)),
scale_colour_gradientn(limits = c(-1, 1), colors = colours),
Expand Down
66 changes: 33 additions & 33 deletions R/correlate.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' Correlation Data Frame
#'
#'
#' An implementation of stats::cor(), which returns a correlation data frame
#' rather than a matrix. See details below. Additional adjustment include the
#' use of pairwise deletion by default.
#'
#'
#' \itemize{
#' This function returns a correlation matrix as a correlation data frame in
#' the following format:
Expand All @@ -13,7 +13,7 @@
#' \item Standardized variances (the matrix diagonal) set to missing values by
#' default (\code{NA}) so they can be ignored in calculations.
#' }
#'
#'
#' @inheritParams stats::cor
#' @inheritParams as_cordf
#' @param quiet Set as TRUE to suppress message about `method` and `use`
Expand All @@ -24,24 +24,24 @@
#' \dontrun{
#' correlate(iris)
#' }
#'
#'
#' correlate(iris[-5])
#'
#'
#' correlate(mtcars)
#'
#'
#' \dontrun{
#'
#'
#' # Also supports DB backend and collects results into memory
#'
#'
#' library(sparklyr)
#' sc <- spark_connect(master = "local")
#' mtcars_tbl <- copy_to(sc, mtcars)
#' mtcars_tbl %>%
#' mtcars_tbl %>%
#' correlate(use = "pairwise.complete.obs", method = "spearman")
#' spark_disconnect(sc)
#'
#'
#' }
#'
#'
correlate <- function(x, y = NULL,
use = "pairwise.complete.obs",
method = "pearson",
Expand All @@ -60,7 +60,7 @@ correlate.default <- function(x, y = NULL,
if (!quiet)
message("\nCorrelation method: '", method, "'",
"\nMissing treated using: '", use, "'\n")

as_cordf(x, diagonal = diagonal)
}

Expand All @@ -75,61 +75,61 @@ correlate.tbl_sql <- function(x, y = NULL,
if(!is.null(y)) stop("y is not supported for tables with a SQL back-end")
if(!is.na(diagonal)) stop("Only NA's are supported for same field correlations")
df_cor <- NULL

if("tbl_spark" %in% class(x)){

if(!method %in% c("pearson", "spearman"))
stop("Only pearson or spearman methods are currently supported")

df_cor <- as_cordf(sparklyr::ml_corr(x, method = method))
}

if(is.null(df_cor)){

if(method != "pearson") stop("Only 'pearson' method is currently supported")

col_names <- colnames(x)

cols <- map_dfr(
col_names,
~ tibble(
x = .x,
x = .x,
y = col_names
))
combos <- map_chr(transpose(cols), ~ paste0(sort(c(.x$x, .x$y)), collapse = "_"))
cols$combos <- combos
unique_combos <- unique(combos)

f_cols <- map_dfr(unique_combos, ~ head(cols[cols$combos == .x, ], 1))

if(!all(unique(f_cols$x) == col_names)) stop("Not all variable combinations are present")
if(!all(unique(f_cols$y) == col_names)) stop("Not all variable combinations are present")

f_cols <- f_cols[f_cols$x != f_cols$y, ]

mnprod <- map(transpose(f_cols), ~expr(sum(!! sym(.x$x) * !! sym(.x$y), na.rm = TRUE)))
mnprod <- set_names(mnprod, f_cols$combos)

mnsum <- map(col_names, ~expr(sum(!! sym(.x), na.rm = TRUE)))
mnsum <- set_names(mnsum, paste0(col_names, "_sum"))

mntwo <- map(col_names, ~expr(sum(!! sym(.x) * !! sym(.x), na.rm = TRUE)))
mntwo <- set_names(mntwo, paste0(col_names, "_two"))
obs <- set_names(list(expr(n())), "obs")
db_totals <- collect(summarise(x, !!! c(mnsum, mntwo, mnprod, obs)))

f_cols$x_sum = paste0(f_cols$x, "_sum")
f_cols$y_sum = paste0(f_cols$y, "_sum")
f_cols$x_two = paste0(f_cols$x, "_two")
f_cols$y_two = paste0(f_cols$y, "_two")

l_cols <- transpose(f_cols)

top <- map(l_cols, ~ expr((obs * !! sym(.x$combos)) - (!! sym(.x$x_sum) * !! sym(.x$y_sum)) ))
bottom <- map(l_cols, ~ expr((sqrt(((obs * !! sym(.x$x_two)) - (!! sym(.x$x_sum) * !! sym(.x$x_sum))) * ((obs * !! sym(.x$y_two)) - (!! sym(.x$y_sum) * !! sym(.x$y_sum)))))))
f_cor <- map(seq_along(top), ~expr(!! top[[.x]] / !! bottom[[.x]]))
f_cor <- set_names(f_cor, f_cols$combos)

f_cors <- summarise(db_totals, !!! f_cor)
f_combos <- map(combos, ~ f_cors[, colnames(f_cors) == .x])
if("tbl_df" %in% class(f_cors)) {
Expand All @@ -138,7 +138,7 @@ correlate.tbl_sql <- function(x, y = NULL,
f_combos <- map(f_combos, ~ ifelse(!is.null(nrow(.x)), NA, .x))
}
f_combos <- map_dbl(f_combos, ~ifelse(is.null(.x), NA, .x))

cor_tbl <- cols
cor_tbl$cor <- f_combos
cor_tbl$xn <- map_int(
Expand All @@ -150,7 +150,7 @@ correlate.tbl_sql <- function(x, y = NULL,
~which(.x == col_names)
)
cors_matrix <- matrix(
ncol = length(col_names),
ncol = length(col_names),
nrow = length(col_names)
)
for(i in seq_along(combos)){
Expand All @@ -159,7 +159,7 @@ correlate.tbl_sql <- function(x, y = NULL,
colnames(cors_matrix) <- col_names
df_cor <- as_cordf(cors_matrix)
}

if(!is.null(df_cor)){
class(df_cor) <- c("cor_df", class(df_cor))
if (!quiet)
Expand Down
4 changes: 2 additions & 2 deletions R/dataframe.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' @export
fashion.data.frame <- function(x, decimals = 2, leading_zeros = FALSE, na_print = "") {
x <- purrr::map(x ,
x <- purrr::map(x ,
fashion,
decimals = decimals,
leading_zeros = leading_zeros,
na_print = na_print
)
)
noquote(as.data.frame(x))
}
Loading