Skip to content

Commit

Permalink
Merge pull request #93 from krlmlr/f-ws-eol
Browse files Browse the repository at this point in the history
Trim whitespace at EOL
  • Loading branch information
juliasilge authored Jun 14, 2020
2 parents 59af5de + 40b09ac commit 9b25cf4
Show file tree
Hide file tree
Showing 16 changed files with 146 additions and 143 deletions.
70 changes: 35 additions & 35 deletions R/cor_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#' @export
as_matrix.cor_df <- function(x, diagonal) {

# Separate rownames
row_name <- x$rowname
x <- x[, colnames(x) != "rowname"]
Expand All @@ -19,18 +19,18 @@ as_matrix.cor_df <- function(x, diagonal) {

#' @export
shave.cor_df <- function(x, upper = TRUE) {

# Separate rownames
row_name <- x$rowname
x <- x[, colnames(x) != "rowname"]

# Remove upper matrix
if (upper) {
x[upper.tri(x)] <- NA
} else {
x[lower.tri(x)] <- NA
}

# Reappend rownames and class
x <- first_col(x, row_name)
class(x) <- c("cor_df", class(x))
Expand All @@ -39,20 +39,20 @@ shave.cor_df <- function(x, upper = TRUE) {

#' @export
rearrange.cor_df <- function(x, method = "PCA", absolute = TRUE) {

# Convert to original matrix
m <- as_matrix(x, diagonal = 1)
if (absolute) abs(m)

if (absolute) abs(m)

if (method %in% c("BEA", "BEA_TSP", "PCA", "PCA_angle")) {
ord <- seriation::seriate(m, method = method)
} else {
ord <- seriation::seriate(dist(m), method = method)
}

ord <- seriation::get_order(ord)

# Arrange and return matrix
# "c(1, 1 + ..." to handle rowname column
x <- x[ord, c(1, 1 + ord)]
Expand All @@ -68,7 +68,7 @@ focus_.cor_df <- function(x, ..., .dots = NULL, mirror = FALSE) {
vars <- enquos(...)
row_name <- x$rowname
if(length(vars) > 0) {
x <- dplyr::select(x, !!! vars)
x <- dplyr::select(x, !!! vars)
} else {
x <- dplyr::select(x, .dots)
}
Expand All @@ -80,7 +80,7 @@ focus_.cor_df <- function(x, ..., .dots = NULL, mirror = FALSE) {
} else {
x <- first_col(x, row_name)
}

# Exclude these or others from the rows
vars <- x$rowname %in% vars
if (mirror) {
Expand All @@ -94,15 +94,15 @@ focus_.cor_df <- function(x, ..., .dots = NULL, mirror = FALSE) {

#' @export
focus_if.cor_df <- function(x, .predicate, ..., mirror = FALSE) {

# Identify which variables to keep
to_keep <- map_lgl(
x[, colnames(x) != "rowname"],
x[, colnames(x) != "rowname"],
.predicate, ...
)

to_keep <- names(to_keep)[!is.na(to_keep) & to_keep]

if (!length(to_keep)) {
stop("No variables were TRUE given the function.")
}
Expand All @@ -119,18 +119,18 @@ rplot.cor_df <- function(rdf,
colours = c("indianred2", "white", "skyblue1"),
print_cor = FALSE,
colors) {

if (!missing(colors))
colours <- colors

# Store order for factoring the variables
row_order <- rdf$rowname

# Convert data to relevant format for plotting
pd <- stretch(rdf, na.rm = TRUE)
pd <- stretch(rdf, na.rm = TRUE)
pd$size = abs(pd$r)
pd$label = fashion(pd$r)

plot_ <- list(
# Geoms
geom_point(shape = shape),
Expand All @@ -143,12 +143,12 @@ rplot.cor_df <- function(rdf,
if (legend) labs(colour = NULL),
if (!legend) theme(legend.position = "none")
)

ggplot(pd, aes_string(x = "x", y = "y", color = "r",
size = "size", alpha = "size",
label = "label")) +
plot_

# # plot
# ggplot(aes_string(x = "x", y = "y", color = "r",
# size = "size", alpha = "size",
Expand All @@ -157,15 +157,15 @@ rplot.cor_df <- function(rdf,
# scale_colour_gradientn(limits = c(-1, 1), colors = colours) +
# labs(x = "", y ="") +
# theme_classic()
#
#
# if (print_cor) {
# p <- p + geom_text(color = "black", size = 3, show.legend = FALSE)
# }
#
#
# if (!legend) {
# p <- p + theme(legend.position = "none")
# }
#
#
# p
}

Expand All @@ -177,31 +177,31 @@ network_plot.cor_df <- function(rdf,
repel = TRUE,
curved = TRUE,
colors) {

if (min_cor < 0 || min_cor > 1) {
stop ("min_cor must be a value ranging from zero to one.")
}

if (!missing(colors))
colours <- colors

rdf <- as_matrix(rdf, diagonal = 1)
distance <- sign(rdf) * (1 - abs(rdf))

# Use multidimensional Scaling to obtain x and y coordinates for points.
points <- data.frame(stats::cmdscale(abs(distance)))
colnames(points) <- c("x", "y")
points$id <- rownames(points)

# Create a proximity matrix of the paths to be plotted.
proximity <- abs(rdf)
proximity[upper.tri(proximity)] <- NA
diag(proximity) <- NA
proximity[proximity < min_cor] <- NA

# Produce a data frame of data needed for plotting the paths.
n_paths <- sum(!is.na(proximity))
paths <- data.frame(matrix(nrow = n_paths, ncol = 6))
paths <- data.frame(matrix(nrow = n_paths, ncol = 6))
colnames(paths) <- c("x", "y", "xend", "yend", "proximity", "sign")
path <- 1
for(row in 1:nrow(proximity)) {
Expand All @@ -218,17 +218,17 @@ network_plot.cor_df <- function(rdf,
}
}
}

plot_ <- list(
# For plotting paths
if (curved) geom_curve(data = paths,
aes(x = x, y = y, xend = xend, yend = yend,
alpha = proximity, size = proximity,
colour = proximity*sign)),
colour = proximity*sign)),
if (!curved) geom_segment(data = paths,
aes(x = x, y = y, xend = xend, yend = yend,
alpha = proximity, size = proximity,
colour = proximity*sign)),
colour = proximity*sign)),
scale_alpha(limits = c(0, 1)),
scale_size(limits = c(0, 1)),
scale_colour_gradientn(limits = c(-1, 1), colors = colours),
Expand Down
66 changes: 33 additions & 33 deletions R/correlate.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' Correlation Data Frame
#'
#'
#' An implementation of stats::cor(), which returns a correlation data frame
#' rather than a matrix. See details below. Additional adjustment include the
#' use of pairwise deletion by default.
#'
#'
#' \itemize{
#' This function returns a correlation matrix as a correlation data frame in
#' the following format:
Expand All @@ -13,7 +13,7 @@
#' \item Standardized variances (the matrix diagonal) set to missing values by
#' default (\code{NA}) so they can be ignored in calculations.
#' }
#'
#'
#' @inheritParams stats::cor
#' @inheritParams as_cordf
#' @param quiet Set as TRUE to suppress message about `method` and `use`
Expand All @@ -24,24 +24,24 @@
#' \dontrun{
#' correlate(iris)
#' }
#'
#'
#' correlate(iris[-5])
#'
#'
#' correlate(mtcars)
#'
#'
#' \dontrun{
#'
#'
#' # Also supports DB backend and collects results into memory
#'
#'
#' library(sparklyr)
#' sc <- spark_connect(master = "local")
#' mtcars_tbl <- copy_to(sc, mtcars)
#' mtcars_tbl %>%
#' mtcars_tbl %>%
#' correlate(use = "pairwise.complete.obs", method = "spearman")
#' spark_disconnect(sc)
#'
#'
#' }
#'
#'
correlate <- function(x, y = NULL,
use = "pairwise.complete.obs",
method = "pearson",
Expand All @@ -60,7 +60,7 @@ correlate.default <- function(x, y = NULL,
if (!quiet)
message("\nCorrelation method: '", method, "'",
"\nMissing treated using: '", use, "'\n")

as_cordf(x, diagonal = diagonal)
}

Expand All @@ -75,61 +75,61 @@ correlate.tbl_sql <- function(x, y = NULL,
if(!is.null(y)) stop("y is not supported for tables with a SQL back-end")
if(!is.na(diagonal)) stop("Only NA's are supported for same field correlations")
df_cor <- NULL

if("tbl_spark" %in% class(x)){

if(!method %in% c("pearson", "spearman"))
stop("Only pearson or spearman methods are currently supported")

df_cor <- as_cordf(sparklyr::ml_corr(x, method = method))
}

if(is.null(df_cor)){

if(method != "pearson") stop("Only 'pearson' method is currently supported")

col_names <- colnames(x)

cols <- map_dfr(
col_names,
~ tibble(
x = .x,
x = .x,
y = col_names
))
combos <- map_chr(transpose(cols), ~ paste0(sort(c(.x$x, .x$y)), collapse = "_"))
cols$combos <- combos
unique_combos <- unique(combos)

f_cols <- map_dfr(unique_combos, ~ head(cols[cols$combos == .x, ], 1))

if(!all(unique(f_cols$x) == col_names)) stop("Not all variable combinations are present")
if(!all(unique(f_cols$y) == col_names)) stop("Not all variable combinations are present")

f_cols <- f_cols[f_cols$x != f_cols$y, ]

mnprod <- map(transpose(f_cols), ~expr(sum(!! sym(.x$x) * !! sym(.x$y), na.rm = TRUE)))
mnprod <- set_names(mnprod, f_cols$combos)

mnsum <- map(col_names, ~expr(sum(!! sym(.x), na.rm = TRUE)))
mnsum <- set_names(mnsum, paste0(col_names, "_sum"))

mntwo <- map(col_names, ~expr(sum(!! sym(.x) * !! sym(.x), na.rm = TRUE)))
mntwo <- set_names(mntwo, paste0(col_names, "_two"))
obs <- set_names(list(expr(n())), "obs")
db_totals <- collect(summarise(x, !!! c(mnsum, mntwo, mnprod, obs)))

f_cols$x_sum = paste0(f_cols$x, "_sum")
f_cols$y_sum = paste0(f_cols$y, "_sum")
f_cols$x_two = paste0(f_cols$x, "_two")
f_cols$y_two = paste0(f_cols$y, "_two")

l_cols <- transpose(f_cols)

top <- map(l_cols, ~ expr((obs * !! sym(.x$combos)) - (!! sym(.x$x_sum) * !! sym(.x$y_sum)) ))
bottom <- map(l_cols, ~ expr((sqrt(((obs * !! sym(.x$x_two)) - (!! sym(.x$x_sum) * !! sym(.x$x_sum))) * ((obs * !! sym(.x$y_two)) - (!! sym(.x$y_sum) * !! sym(.x$y_sum)))))))
f_cor <- map(seq_along(top), ~expr(!! top[[.x]] / !! bottom[[.x]]))
f_cor <- set_names(f_cor, f_cols$combos)

f_cors <- summarise(db_totals, !!! f_cor)
f_combos <- map(combos, ~ f_cors[, colnames(f_cors) == .x])
if("tbl_df" %in% class(f_cors)) {
Expand All @@ -138,7 +138,7 @@ correlate.tbl_sql <- function(x, y = NULL,
f_combos <- map(f_combos, ~ ifelse(!is.null(nrow(.x)), NA, .x))
}
f_combos <- map_dbl(f_combos, ~ifelse(is.null(.x), NA, .x))

cor_tbl <- cols
cor_tbl$cor <- f_combos
cor_tbl$xn <- map_int(
Expand All @@ -150,7 +150,7 @@ correlate.tbl_sql <- function(x, y = NULL,
~which(.x == col_names)
)
cors_matrix <- matrix(
ncol = length(col_names),
ncol = length(col_names),
nrow = length(col_names)
)
for(i in seq_along(combos)){
Expand All @@ -159,7 +159,7 @@ correlate.tbl_sql <- function(x, y = NULL,
colnames(cors_matrix) <- col_names
df_cor <- as_cordf(cors_matrix)
}

if(!is.null(df_cor)){
class(df_cor) <- c("cor_df", class(df_cor))
if (!quiet)
Expand Down
4 changes: 2 additions & 2 deletions R/dataframe.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' @export
fashion.data.frame <- function(x, decimals = 2, leading_zeros = FALSE, na_print = "") {
x <- purrr::map(x ,
x <- purrr::map(x ,
fashion,
decimals = decimals,
leading_zeros = leading_zeros,
na_print = na_print
)
)
noquote(as.data.frame(x))
}
Loading

0 comments on commit 9b25cf4

Please sign in to comment.