Skip to content

Commit

Permalink
really fix fitting empty sets. closes #23
Browse files Browse the repository at this point in the history
  • Loading branch information
Johan Larsson committed Mar 3, 2018
1 parent 6f0540a commit e78461f
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 63 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ of `plot.euler()`. This change means that functions such as
`gridExtra::grid.arrange()` now work as one would intuit on the objects
produced by `plot.euler()`.
* Fitting and plotting euler diagrams with empty sets is now allowed (#23).
Empty sets in the input will be returned as `NA` in the resulting
`data.frame` of ellipses.

## Bug fixes

Expand Down
105 changes: 60 additions & 45 deletions R/euler.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ euler.default <- function(
combo_names <- strsplit(names(combinations), split = "&", fixed = TRUE)
setnames <- unique(unlist(combo_names, use.names = FALSE))

# setup preliminary return pars

n <- length(setnames)
id <- bit_indexr(n)
N <- NROW(id)
Expand All @@ -159,34 +161,52 @@ euler.default <- function(
}
}

if (n > 1L) {
# Decompose or collect set volumes depending on input
if (match.arg(input) == "disjoint") {
areas_disjoint <- areas
areas[] <- 0
for (i in rev(seq_along(areas))) {
prev_areas <- rowSums(id[, id[i, ], drop = FALSE]) == sum(id[i, ])
areas[i] <- sum(areas_disjoint[prev_areas])
}
} else if (match.arg(input) == "union") {
areas_disjoint <- double(length(areas))
for (i in rev(seq_along(areas))) {
prev_areas <- rowSums(id[, id[i, ], drop = FALSE]) == sum(id[i, ])
areas_disjoint[i] <- areas[i] - sum(areas_disjoint[prev_areas])
}
if (any(areas_disjoint < 0))
stop("Check your set configuration. Some disjoint areas are negative.")
# Decompose or collect set volumes depending on input
if (match.arg(input) == "disjoint") {
areas_disjoint <- areas
areas[] <- 0
for (i in rev(seq_along(areas))) {
prev_areas <- rowSums(id[, id[i, ], drop = FALSE]) == sum(id[i, ])
areas[i] <- sum(areas_disjoint[prev_areas])
}
} else if (match.arg(input) == "union") {
areas_disjoint <- double(length(areas))
for (i in rev(seq_along(areas))) {
prev_areas <- rowSums(id[, id[i, ], drop = FALSE]) == sum(id[i, ])
areas_disjoint[i] <- areas[i] - sum(areas_disjoint[prev_areas])
}
if (any(areas_disjoint < 0))
stop("Check your set configuration. Some disjoint areas are negative.")
}

# setup return object
fpar <- as.data.frame(matrix(
NA,
ncol = 5L,
nrow = n,
dimnames = list(setnames, c("h", "k", "a", "b", "phi"))
))

# setup return values
orig <- rep.int(0, N)
fit <- rep.int(0, N)
names(orig) <- names(fit) <-
apply(id, 1L, function(x) paste0(setnames[x], collapse = "&"))

# find empty sets
empty_sets <- areas[seq_len(n)] < sqrt(.Machine$double.eps)
empty_subsets <- rowSums(id[, empty_sets, drop = FALSE]) > 0

id <- id[!empty_subsets, !empty_sets, drop = FALSE]
N <- NROW(id)
n <- sum(!empty_sets)
areas <- areas[!empty_subsets]
areas_disjoint <- areas_disjoint[!empty_subsets]

if (n > 1L) {
if (all(areas == 0)) {
# all sets are zero
fpar <- matrix(data = rep.int(0, 5L*n),
ncol = 5L,
dimnames = list(setnames, c("h", "k", "a", "b", "phi")))
regionError <- diagError <- stress <- 0
orig <- fit <- areas
names(orig) <- names(fit) <-
apply(id, 1L, function(x) paste0(setnames[x], collapse = "&"))
fpar[] <- NA
} else {
id_sums <- rowSums(id)
ones <- id_sums == 1L
Expand Down Expand Up @@ -251,7 +271,7 @@ euler.default <- function(
upr <- c(rep.int(bnd, 4L), 2*pi)
}

orig <- areas_disjoint
orig[!empty_subsets] <- areas_disjoint

# Try to find a solution using nlm() first (faster)
# TODO: Allow user options here?
Expand All @@ -267,7 +287,7 @@ euler.default <- function(
data = nlm_solution,
ncol = if (circle) 3L else 5L,
dimnames = list(
setnames,
setnames[!empty_sets],
if (circle) c("h", "k", "r") else c("h", "k", "a", "b", "phi")
),
byrow = TRUE
Expand All @@ -280,7 +300,7 @@ euler.default <- function(

nlm_pars <- compress_layout(normalize_pars(tpar), id, nlm_fit)

nlm_diagError <- diagError(nlm_fit, orig)
nlm_diagError <- diagError(nlm_fit, orig[!empty_subsets])

# If inadequate solution, try with a second optimizer (slower, better)
if (!circle && control$extraopt &&
Expand Down Expand Up @@ -333,53 +353,48 @@ euler.default <- function(
# Check for the best solution
if (last_ditch_diagError < nlm_diagError) {
final_par <- last_ditch_effort
fit <- last_ditch_fit
fit[!empty_subsets] <- last_ditch_fit
} else {
final_par <- nlm_solution
fit <- nlm_fit
fit[!empty_subsets] <- nlm_fit
}
} else {
final_par <- nlm_solution
fit <- nlm_fit
fit[!empty_subsets] <- nlm_fit
}

names(orig) <- names(fit) <-
apply(id, 1L, function(x) paste0(setnames[x], collapse = "&"))
# names(orig) <- names(fit) <-
# apply(id, 1L, function(x) paste0(setnames[x], collapse = "&"))

regionError <- regionError(fit, orig)
diagError <- diagError(regionError = regionError)
stress <- stress(orig, fit)

fpar <- matrix(data = final_par,
temp <- matrix(data = final_par,
ncol = if (circle) 3L else 5L,
byrow = TRUE)

if (circle)
fpar <- cbind(fpar, fpar[, 3L], 0)

dimnames(fpar) <- list(setnames, c("h", "k", "a", "b", "phi"))
temp <- cbind(temp, temp[, 3L], 0)

# Normalize semiaxes and rotation
fpar <- normalize_pars(fpar)
temp <- normalize_pars(temp)

# Find disjoint clusters and compress the layout
fpar <- compress_layout(fpar, id, fit)
temp <- compress_layout(temp, id, fit[!empty_subsets])

# Center the solution on the coordinate plane
fpar <- center_layout(fpar)
fpar[!empty_sets, ] <- center_layout(temp)
}
} else {
# One set
fpar <- matrix(data = c(0, 0, sqrt(areas/pi), sqrt(areas/pi), 0),
ncol = 5L,
dimnames = list(setnames, c("h", "k", "a", "b", "phi")))
fpar[!empty_sets, ] <- c(0, 0, sqrt(areas/pi), sqrt(areas/pi), 0)
regionError <- diagError <- stress <- 0
orig <- fit <- areas
names(orig) <- names(fit) <- setnames
orig[!empty_subsets] <- fit[!empty_subsets] <- areas
}

# Return eulerr structure
structure(list(ellipses = as.data.frame(fpar),
structure(list(ellipses = fpar,
original.values = orig,
fitted.values = fit,
residuals = orig - fit,
Expand Down
50 changes: 32 additions & 18 deletions R/plot.euler.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ plot.euler <- function(x,
rot = opar$labels$rot)
}

labels$rot <- rep_len(labels$rot, n_e)
labels$gp <- setup_gpar(list(col = opar$labels$col,
alpha = opar$labels$alpha,
fontsize = opar$labels$fontsize,
Expand All @@ -290,6 +291,7 @@ plot.euler <- function(x,
} else {
quantities <- list(labels = quantities, rot = opar$quantities$rot)
}
quantities$rot <- rep_len(quantities$rot, n_id)

quantities$gp <- setup_gpar(list(col = opar$quantities$col,
alpha = opar$quantities$alpha,
Expand All @@ -299,7 +301,7 @@ plot.euler <- function(x,
lineheight = opar$quantities$lineheight,
font = opar$quantities$font),
quantities,
n_e)
n_id)
} else {
quantities <- NULL
}
Expand Down Expand Up @@ -632,14 +634,20 @@ setup_geometry <- function(x,
n,
id) {
dd <- x$ellipses
orig <- x$original.values
fitted <- x$fitted.values
empty_sets <- is.na(dd[, 1L])
empty_subsets <- rowSums(id[, empty_sets, drop = FALSE]) > 0

orig <- x$original.values[!empty_subsets]
fitted <- x$fitted.values[!empty_subsets]
dd <- dd[!empty_sets, , drop = FALSE]

do_fills <- !is.null(fills)
do_edges <- !is.null(edges)
do_labels <- !is.null(labels)
do_quantities <- !is.null(quantities)

id <- id[!empty_subsets, !empty_sets, drop = FALSE]

h <- dd$h
k <- dd$k
a <- dd$a
Expand Down Expand Up @@ -697,15 +705,14 @@ setup_geometry <- function(x,
}

if (do_labels || do_quantities) {
void_sets <- apply(id, 2, function(i) all(fitted[i] == 0))
singles <- rowSums(id) == 1
empty <- abs(fitted) < sqrt(.Machine$double.eps)

centers <- cbind(t(locate_centers(h, k, a, b, phi, fitted)), seq_len(n_id))
rownames(centers) <- names(orig)

if (do_labels) {
labels <- list(labels = labels$labels)
labels <- list(labels = labels$labels[which(!empty_sets)])
center_labels <- labels$labels[!is.nan(centers[singles, 1L])]
}

Expand All @@ -725,21 +732,24 @@ setup_geometry <- function(x,
centers[!is.nan(centers[, 1L]) & !singles & droprows, , drop = FALSE]
quantities_centers <- rbind(quantities_centers, labels_centers)
if (!is.null(quantities$labels))
quantities <- list(labels = quantities$labels[quantities_centers[, 3L]])
quantities <- list(
labels =
quantities$labels[which(!empty_subsets)][quantities_centers[, 3L]]
)
else
quantities <- list(labels = orig[quantities_centers[, 3L]])
quantities$x <- quantities_centers[, 1L]
quantities$y <- quantities_centers[, 2L]
quantities$x[void_sets] <- NA
quantities$y[void_sets] <- NA
# quantities$x[void_sets] <- NA
# quantities$y[void_sets] <- NA
}

if (do_labels) {
labels$x <- labels_centers[, 1L]
labels$y <- labels_centers[, 2L]
labels$labels <- center_labels
labels$x[void_sets] <- NA
labels$y[void_sets] <- NA
# labels$x[void_sets] <- NA
# labels$y[void_sets] <- NA
}
}

Expand All @@ -750,6 +760,8 @@ setup_geometry <- function(x,
edges = edges,
labels = labels,
quantities = quantities,
empty_sets = empty_sets,
empty_subsets = empty_subsets,
xlim = limits$xlim,
ylim = limits$ylim)
}
Expand Down Expand Up @@ -777,13 +789,15 @@ setup_grobs <- function(x,
data_fills <- x$fills
data_quantities <- x$quantities
fitted <- x$fitted.values
empty_sets <- x$empty_sets
empty_subsets <- x$empty_subsets

do_labels <- !is.null(data_labels)
do_edges <- !is.null(data_edges)
do_fills <- !is.null(data_fills)
do_quantities <- !is.null(data_quantities)

n_e <- nrow(x$ellipses)
n_e <- NROW(x$ellipses)
n_id <- 2L^n_e - 1L
id <- bit_indexr(n_e)

Expand All @@ -795,7 +809,7 @@ setup_grobs <- function(x,
id.lengths = data_edges$id.lengths,
default.units = "native",
name = "edges.grob",
gp = edges$gp)
gp = edges$gp[which(!empty_sets)])
}

# fills
Expand All @@ -806,7 +820,7 @@ setup_grobs <- function(x,
data_fills[[1]]$y,
default.units = "native",
name = "fills.grob",
gp = fills$gp[1L]
gp = fills$gp[which(!empty_subsets)[1L]]
))
} else {
fills_grob <- vector("list", n_id)
Expand Down Expand Up @@ -838,7 +852,7 @@ setup_grobs <- function(x,
id.lengths = data_fills[[i]]$id.lengths,
default.units = "native",
name = paste0("fills.grob.", i),
gp = fills$gp[fill_id[i]]
gp = fills$gp[which(!empty_subsets)][fill_id[i]]
)
}
fills_grob <- do.call(grid::gList, fills_grob)
Expand All @@ -856,11 +870,11 @@ setup_grobs <- function(x,
label = data_quantities$labels,
x = data_quantities$x,
y = data_quantities$y,
rot = quantities$rot,
rot = quantities$rot[which(!empty_subsets)],
vjust = ifelse(lab_id & do_labels, 1, 0.5),
name = "quantities.grob",
default.units = "native",
gp = quantities$gp
gp = quantities$gp[which(!empty_subsets)]
)
}

Expand All @@ -870,11 +884,11 @@ setup_grobs <- function(x,
label = data_labels$labels,
x = data_labels$x,
y = data_labels$y,
rot = labels$rot,
rot = labels$rot[which(!empty_sets)],
vjust = if (do_quantities) -0.5 else 0.5,
name = "labels.grob",
default.units = "native",
gp = labels$gp
gp = labels$gp[!empty_sets]
)
}

Expand Down

0 comments on commit e78461f

Please sign in to comment.