Can visualization alleviate dichotomous thinking? Effects of visual representations on the cliff effect
Jouni Helske, Satu Helske, Matthew Cooper, Anders Ynnerman, Lonni Besançon 13/4/2020
This repository contains data and scripts for reproducing the analysis of the paper Can visualization alleviate dichotomous thinking? Effects of visual representations on the cliff effect by Jouni Helske, Satu Helske, Matthew Cooper, Anders Ynnerman, and Lonni Besançon.
This Rmd file contains full scripts (and extra stuff) to reproduce figures in the paper.
The raw data for both experiments can be found in folders
experiment1/data/
and experiment2/data/
respectively, which also
contains the R data frames used in the analysis (exp1_data.rds
and
exp2_data.rds
). The web pages for the surveys are in folder web
,
with some screenshots in folder screenshots
.
First, we load some packages:
suppressPackageStartupMessages({
library(brms)
library(modelr)
library(ggplot2)
library(dplyr)
library(jsonlite)
library(loo)
library(ggthemes)
})
## Warning: package 'brms' was built under R version 4.0.3
## Warning: package 'Rcpp' was built under R version 4.0.3
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'dplyr' was built under R version 4.0.4
## Warning: package 'jsonlite' was built under R version 4.0.3
## Warning: package 'loo' was built under R version 4.0.3
## Warning: package 'ggthemes' was built under R version 4.0.3
Then we transform the raw data to suitable format for analysis:
path <- "experiment1/data"
answers <- list.files(path, pattern="answers", full.names = TRUE)
# fetch number of participants
n <- length(answers)
# create a data frame for the results
data_raw <- data.frame(id = rep(1:n, each = 32), viz = NA, replication = NA, value = NA,
expertise = NA, degree = NA, age = NA, experience = NA, tools = NA)
# read in answers, not optimal way will do
for(i in 1:n){
x <- strsplit(fromJSON(answers[i]), ",")
dem <- fromJSON(paste0(path, "/demography", x[[1]][1], ".txt"))
for(j in 1:32) {
data_raw[32*(i-1) + j, c("id", "viz", "replication", "value")] <- x[[j]]
data_raw[32*(i-1) + j, c("expertise", "degree", "age", "experience", "tools")] <-
dem[c("expertise", "level", "age", "experience", "tools")]
}
}
saveRDS(data_raw, file = "experiment1/data/data_raw.rds")
# remove person who didn't answer reasonably on the demography part
# Degree is None and more importantly expertise is 1..?
data <- data_raw[data_raw$degree != "None",]
# true p-values
true_p <- c(0.001, 0.01, 0.04, 0.05, 0.06, 0.1, 0.5, 0.8)
# convert to factors and numeric
data <- data %>% mutate(n = factor(ifelse(as.numeric(id) %% 8 < 4, 50, 200)),
id = factor(id),
viz = relevel(factor(viz, labels = c("CI", "gradient", "p", "violin")), "p"),
replication = as.numeric(replication),
value = as.numeric(value),
p = true_p[replication],
true_p = factor(p), # for monotonic but non-linear effect on confidence
confidence = (value - 1) / 99,
expertise = factor(expertise)) %>% arrange(id, viz)
# Classify expertise
data$expertise <- recode_factor(data$expertise,
"Statistics" = "Stats/ML",
"statistics" = "Stats/ML",
"statistics/machine learning" = "Stats/ML",
"Analytics" = "Stats/ML",
"Statistics/Medicine" = "Stats/ML",
"Data science" = "Stats/ML",
"Biostatistics" = "Stats/ML",
"IT & Business Data Science" = "Stats/ML",
"methods" = "Stats/ML",
"AI" = "Stats/ML",
"Neuroscience and Statistics" = "Stats/ML",
"Computer vision" = "Stats/ML",
"Psychometric" = "Stats/ML",
"HCI, Visualization" = "VIS/HCI",
"HCI/Visualization" = "VIS/HCI",
"interaction design and evaluation" = "VIS/HCI",
"Human-Computer Interaction" = "VIS/HCI",
"HCI" = "VIS/HCI",
"Vis" = "VIS/HCI",
"Visualization" = "VIS/HCI",
"Data Visualization" = "VIS/HCI",
"CS, Visualization, HCI" = "VIS/HCI",
"Infovis" = "VIS/HCI",
"Visualization / Computer Science" = "VIS/HCI",
"Virtual Reality" = "VIS/HCI",
"Visualisation" = "VIS/HCI",
"research in HCI" = "VIS/HCI",
"Computer science" = "VIS/HCI",
"Computer Science" = "VIS/HCI",
"Social science" = "Social science and humanities",
"Political science" = "Social science and humanities",
"sociology" = "Social science and humanities",
"Sociology" = "Social science and humanities",
"Analytical Sociology" = "Social science and humanities",
"Education research" = "Social science and humanities",
"Economics" = "Social science and humanities",
"market research" = "Social science and humanities",
"Politics" = "Social science and humanities",
"Finance" = "Social science and humanities",
"Linguistics" = "Social science and humanities",
"Education Poliy" = "Social science and humanities",
"Political Science" = "Social science and humanities",
"Psychology" = "Social science and humanities",
"psychology" = "Social science and humanities",
"segregation" = "Social science and humanities",
"Philosophy" = "Social science and humanities",
"organizational science" = "Social science and humanities",
"Strategic Management" = "Social science and humanities",
"network analysis" = "Social science and humanities",
"CSS" = "Social science and humanities",
"Management" = "Social science and humanities",
"Animal science" = "Physical and life sciences",
"Biology" = "Physical and life sciences",
"Botany" = "Physical and life sciences",
"ecology" = "Physical and life sciences",
"Zoology" = "Physical and life sciences",
"Physics" = "Physical and life sciences",
"cognitive neuroscience" = "Physical and life sciences",
"Neuroscience" = "Physical and life sciences",
"neuroscience/motor control" = "Physical and life sciences",
"Biomechanics" = "Physical and life sciences",
"Neurocognitive Psychology" = "Physical and life sciences",
"pharma" = "Physical and life sciences",
"Public health" = "Physical and life sciences",
"neurobiology" = "Physical and life sciences",
"medicine" = "Physical and life sciences",
"Molcular Biology" = "Physical and life sciences",
"Wind Energy" = "Physical and life sciences",
"Mathematical Biology" = "Physical and life sciences",
"Pain" = "Physical and life sciences",
"genomics" = "Physical and life sciences",
"Medicine" = "Physical and life sciences",
"Water engineering" = "Physical and life sciences")
data$expertise <- relevel(data$expertise, "Stats/ML")
Let’s first look at some descriptive statistic:
ids <- which(!duplicated(data$id))
barplot(table(data$expertise[ids]))
barplot(table(data$degree[ids]))
hist(as.numeric(data$age[ids]))
Let us now focus on the cliff effect as difference between confidence when p-value=0.04 versus p-value=0.06:
data %>% group_by(id, viz) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz) %>%
summarise(
mean = mean(difference),
median = median(difference),
sd = sd(difference),
se = sd(difference) / sqrt(length(difference)),
"2.5%" = quantile(difference, 0.025),
"97.5%" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'id' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 7
## viz mean median sd se `2.5%` `97.5%`
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 p 0.192 0.111 0.274 0.0257 -0.188 0.721
## 2 CI 0.232 0.172 0.250 0.0235 -0.0525 0.842
## 3 gradient 0.102 0.0808 0.239 0.0225 -0.366 0.741
## 4 violin 0.126 0.101 0.204 0.0192 -0.162 0.616
data %>% group_by(id, viz) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
ggplot(aes(x = viz, y = difference)) +
geom_violin() +
geom_point(alpha = 0.5, position = position_jitter(0.1)) +
scale_y_continuous("Difference in confidence when p-value is 0.06 vs 0.04") +
scale_x_discrete("Representation") +
theme_classic()
## `summarise()` regrouping output by 'id' (override with `.groups` argument)
The cliff effect seems to be largest when information is presented as traditional CI or p-value which behave similarly. Gradient CI and Violin CI plots are pretty close to each other.
Now same but with subgrouping using sample size:
data %>% group_by(id, viz, n) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz, n) %>%
summarise(
mean = mean(difference),
median = median(difference),
sd = sd(difference),
se = sd(difference) / sqrt(length(difference)),
"2.5%" = quantile(difference, 0.025),
"97.5%" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 8 x 8
## # Groups: viz [4]
## viz n mean median sd se `2.5%` `97.5%`
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 p 50 0.215 0.111 0.266 0.0359 -0.0535 0.687
## 2 p 200 0.169 0.116 0.280 0.0368 -0.218 0.777
## 3 CI 50 0.207 0.162 0.187 0.0252 -0.0263 0.638
## 4 CI 200 0.254 0.182 0.297 0.0390 -0.101 0.905
## 5 gradient 50 0.117 0.0808 0.218 0.0294 -0.220 0.669
## 6 gradient 200 0.0888 0.0556 0.259 0.0340 -0.369 0.729
## 7 violin 50 0.116 0.121 0.167 0.0225 -0.162 0.504
## 8 violin 200 0.136 0.0758 0.235 0.0308 -0.123 0.686
and expertise:
data %>% group_by(id, viz, expertise) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz, expertise) %>%
summarise(
mean = mean(difference),
median = median(difference),
sd = sd(difference),
se = sd(difference) / sqrt(length(difference)),
"2.5%" = quantile(difference, 0.025),
"97.5%" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 16 x 8
## # Groups: viz [4]
## viz expertise mean median sd se `2.5%` `97.5%`
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 p Stats/ML 0.256 0.141 0.303 0.0661 -0.0758 0.828
## 2 p VIS/HCI 0.144 0.111 0.309 0.0531 -0.557 0.724
## 3 p Social science and huma~ 0.193 0.126 0.203 0.0359 -0.0427 0.573
## 4 p Physical and life scien~ 0.199 0.0657 0.278 0.0546 -0.140 0.751
## 5 CI Stats/ML 0.303 0.212 0.256 0.0559 0.00505 0.803
## 6 CI VIS/HCI 0.194 0.167 0.252 0.0432 -0.184 0.743
## 7 CI Social science and huma~ 0.247 0.162 0.247 0.0436 -0.0169 0.908
## 8 CI Physical and life scien~ 0.204 0.116 0.245 0.0481 -0.0417 0.803
## 9 gradient Stats/ML 0.160 0.101 0.193 0.0421 -0.0556 0.591
## 10 gradient VIS/HCI 0.0401 0.0354 0.301 0.0516 -0.515 0.794
## 11 gradient Social science and huma~ 0.0859 0.0707 0.221 0.0391 -0.288 0.549
## 12 gradient Physical and life scien~ 0.158 0.106 0.187 0.0368 -0.0631 0.600
## 13 violin Stats/ML 0.189 0.131 0.234 0.0511 -0.0455 0.707
## 14 violin VIS/HCI 0.0484 0.0303 0.133 0.0229 -0.183 0.316
## 15 violin Social science and huma~ 0.145 0.116 0.216 0.0382 -0.147 0.616
## 16 violin Physical and life scien~ 0.155 0.111 0.219 0.0430 -0.143 0.617
In terms of sample size, there doesn’t seem to be clear differences in cliff effect especially when considering medians. In terms of expertise, there seems to be some differences especially in terms of variability (most notably the Violin plot for VIS/HCI), but the differences are likely due to few very extreme cases:
data %>% group_by(id, viz, expertise) %>%
summarize(
difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
ggplot(aes(x=viz, y = difference)) + geom_violin() + theme_classic() +
scale_y_continuous("Difference in confidence when p-value is 0.04 vs 0.06") +
scale_x_discrete("Representation") +
geom_point(aes(colour = expertise), position=position_jitter(0.1))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
Let’s check how the much extreme answers (full or zero confidence) there are in different groups:
data %>% group_by(id, viz, n) %>%
mutate(extreme = confidence %in% c(0, 1)) %>%
group_by(viz, n) %>%
summarise(
mean = mean(extreme),
sd = sd(extreme),
se = sd(extreme) / sqrt(length(extreme)))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 8 x 5
## # Groups: viz [4]
## viz n mean sd se
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 p 50 0.159 0.366 0.0175
## 2 p 200 0.164 0.370 0.0172
## 3 CI 50 0.148 0.355 0.0169
## 4 CI 200 0.179 0.384 0.0178
## 5 gradient 50 0.127 0.334 0.0159
## 6 gradient 200 0.153 0.360 0.0167
## 7 violin 50 0.123 0.328 0.0157
## 8 violin 200 0.159 0.367 0.0170
data %>% group_by(id, viz, expertise) %>%
mutate(extreme = confidence %in% c(0, 1)) %>%
group_by(viz, expertise) %>%
summarise(
mean = mean(extreme),
sd = sd(extreme),
se = sd(extreme) / sqrt(length(extreme)))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 16 x 5
## # Groups: viz [4]
## viz expertise mean sd se
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 p Stats/ML 0.196 0.398 0.0307
## 2 p VIS/HCI 0.184 0.388 0.0235
## 3 p Social science and humanities 0.145 0.352 0.0220
## 4 p Physical and life sciences 0.125 0.332 0.0230
## 5 CI Stats/ML 0.202 0.403 0.0311
## 6 CI VIS/HCI 0.184 0.388 0.0235
## 7 CI Social science and humanities 0.148 0.356 0.0223
## 8 CI Physical and life sciences 0.125 0.332 0.0230
## 9 gradient Stats/ML 0.208 0.407 0.0314
## 10 gradient VIS/HCI 0.140 0.347 0.0211
## 11 gradient Social science and humanities 0.109 0.313 0.0195
## 12 gradient Physical and life sciences 0.125 0.332 0.0230
## 13 violin Stats/ML 0.196 0.398 0.0307
## 14 violin VIS/HCI 0.162 0.369 0.0224
## 15 violin Social science and humanities 0.105 0.308 0.0192
## 16 violin Physical and life sciences 0.115 0.320 0.0222
Stats/ML and VIS/HCI groups tend to give slightly more extreme answers, but differences are quite small.
For modelling the data and the potential cliff effect we use piece-wise logit-normal model with following pdf:
Here α = P(x ∈ {0, 1}) is the probability of answering one of the extreme values (not at all confident or fully confident), and γ = P(x = 1 ∣ x ∈ {0, 1}), is the conditional probability of full confidence given that the answer is one of the extremes.
For μ,α,γ, and σ, we define following linear predictors:
$$ \begin{align} \begin{split} \mu &\sim viz \cdot I(p < 0.05) \cdot logit(p) + viz \cdot I(p = 0.05) \\ & + (viz + I(p < 0.05) \cdot logit(p) + I(p = 0.05) \mid id),\\ \alpha &\sim p \cdot viz + (1 \mid id),\\ \gamma &\sim mo(p),\\ \sigma &\sim viz + (1 \mid id), \end{split} \end{align} $$ where p is a categorical variable defining the true p-value, logit(p) is a continuous variable of the logit-transformed p-value, m**o(p) denotes a monotonic effect of the p-value, the dot corresponds to interaction (I(p = 0.05) ⋅ viz both the main and two-way interaction terms) and (z ∣ i**d) denotes participant-level random effect for variable z. As priors we used the relatively uninformative defaults of the package.
Now in a presence of a cliff effect we should observe a discontinuity in an otherwise linear relationship (in logit-logit scale) between the true p-value and participants’ confidence.
We also tested submodels of this model (omitting some of the interactions or random effects), and all of these models gave very similar results. However, this encompassing model integrates over the uncertainty regarding the parameter estimates (with coefficient zero corresponding to simpler model where the variable is omitted) and is that sense “more Bayesian” than selecting some of the simpler models (note that we are not particularly interested in predictive performance).
Now we create the necessary functions for our model:
stan_funs <- "
real logit_p_gaussian_lpdf(real y, real mu, real sigma,
real zoi, real coi) {
if (y == 0) {
return bernoulli_lpmf(1 | zoi) + bernoulli_lpmf(0 | coi);
} else if (y == 1) {
return bernoulli_lpmf(1 | zoi) + bernoulli_lpmf(1 | coi);
} else {
return bernoulli_lpmf(0 | zoi) + normal_lpdf(logit(y) | mu, sigma);
}
}
real logit_p_gaussian_rng(real y, real mu, real sigma,
real zoi, real coi) {
// 0 or 1
int zero_one = bernoulli_rng(zoi);
if (zero_one == 1) {
// casting to real
int one = bernoulli_rng(coi);
if (one == 1) {
return 1.0;
} else {
return 0.0;
}
} else {
return inv_logit(normal_rng(mu, sigma));
}
}
"
log_lik_logit_p_gaussian <- function(i, draws) {
# mu <- draws$dpars$mu[, i]
# zoi <- draws$dpars$zoi[, i]
# coi <- draws$dpars$coi[, i]
# sigma <- draws$dpars$sigma
# y <- draws$data$Y[i]
mu <- brms:::get_dpar(draws, "mu", i = i)
zoi <- brms:::get_dpar(draws, "zoi", i = i)
coi <- brms:::get_dpar(draws, "coi", i = i)
sigma <- brms:::get_dpar(draws, "sigma", i = i)
y <- draws$data$Y[i]
if (y == 0) {
dbinom(1, 1, zoi, TRUE) + dbinom(0, 1, coi, TRUE)
} else if (y == 1) {
dbinom(1, 1, zoi, TRUE) + dbinom(1, 1, coi, TRUE)
} else {
dbinom(0, 1, zoi, TRUE) + dnorm(qlogis(y), mu, sigma, TRUE)
}
}
predict_logit_p_gaussian <- function(i, draws, ...) {
mu <- brms:::get_dpar(draws, "mu", i = i)
zoi <- brms:::get_dpar(draws, "zoi", i = i)
coi <- brms:::get_dpar(draws, "coi", i = i)
sigma <- brms:::get_dpar(draws, "sigma", i = i)
zero_one <- rbinom(length(zoi), 1, zoi)
ifelse(zero_one, rbinom(length(coi), 1, coi), plogis(rnorm(length(mu), mu, sigma)))
}
fitted_logit_p_gaussian <- function(draws) {
mu <- draws$dpars$mu
zoi <- draws$dpars$zoi
coi <- draws$dpars$coi
sigma <- draws$dpars$sigma
# no analytical solution for the mean of logistic normal distribution, rely on simulation
for (i in 1:ncol(mu)) {
for(j in 1:nrow(mu)) {
mu[j, i] <- mean(plogis(rnorm(1000, mu[j, i], sigma[j])))
}
}
zoi * coi + (1 - zoi) * mu
}
logit_p_gaussian <- custom_family(
"logit_p_gaussian",
dpars = c("mu", "sigma", "zoi", "coi"),
links = c("identity", "log", "logit", "logit"),
lb = c(NA, 0, 0, 0), ub = c(NA, NA, 1, 1),
type = "real",
log_lik = log_lik_logit_p_gaussian,
predict = predict_logit_p_gaussian,
fitted = fitted_logit_p_gaussian)
## Warning: Argument 'predict' is deprecated. Please use argument
## 'posterior_predict' instead.
## Warning: Argument 'fitted' is deprecated. Please use argument 'posterior_epred'
## instead.
And create few additional variables:
data <- data %>%
mutate(
logit_p = qlogis(p),
p_lt0.05 = factor(p < 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
p_eq0.05 = factor(p == 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
cat_p = recode_factor(true_p,
"0.06" = ">0.05", "0.1" = ">0.05", "0.5" = ">0.05", "0.8" = ">0.05",
.ordered = TRUE))
fit_exp1 <- brm(bf(
confidence ~
viz * p_lt0.05 * logit_p +
viz * p_eq0.05 +
(viz + p_lt0.05 * logit_p + p_eq0.05 | id),
zoi ~
viz * true_p + (viz | id),
coi ~ mo(cat_p),
sigma ~ viz + (1 | id)),
data = data,
family = logit_p_gaussian,
stanvars = stanvar(scode = stan_funs, block = "functions"),
chains = 4, cores = 4, iter = 2000, init = 0,
save_warmup = FALSE, save_all_pars = TRUE, refresh = 0)
First, let us check the parameter estimates of the model:
fit_exp1
## Family: logit_p_gaussian
## Links: mu = identity; sigma = log; zoi = logit; coi = logit
## Formula: confidence ~ viz * p_lt0.05 * logit_p + viz * p_eq0.05 + (viz + p_lt0.05 * logit_p + p_eq0.05 | id)
## zoi ~ viz * true_p + (viz | id)
## coi ~ mo(cat_p)
## sigma ~ viz + (1 | id)
## Data: data (Number of observations: 3616)
## Samples: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~id (Number of levels: 113)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 1.39 0.13 1.15 1.66 1.00
## sd(vizCI) 0.66 0.07 0.52 0.81 1.00
## sd(vizgradient) 0.89 0.09 0.73 1.07 1.00
## sd(vizviolin) 0.88 0.08 0.73 1.06 1.00
## sd(p_lt0.05No) 0.86 0.12 0.65 1.10 1.00
## sd(logit_p) 0.20 0.03 0.15 0.25 1.00
## sd(p_eq0.05No) 0.18 0.07 0.03 0.30 1.01
## sd(p_lt0.05No:logit_p) 0.27 0.03 0.21 0.33 1.00
## sd(zoi_Intercept) 2.06 0.23 1.65 2.54 1.00
## sd(zoi_vizCI) 0.26 0.18 0.01 0.67 1.00
## sd(zoi_vizgradient) 0.40 0.28 0.02 1.04 1.00
## sd(zoi_vizviolin) 0.47 0.30 0.02 1.14 1.00
## sd(sigma_Intercept) 0.40 0.03 0.34 0.46 1.00
## cor(Intercept,vizCI) -0.08 0.13 -0.33 0.19 1.00
## cor(Intercept,vizgradient) -0.36 0.11 -0.57 -0.13 1.00
## cor(vizCI,vizgradient) 0.43 0.11 0.20 0.62 1.00
## cor(Intercept,vizviolin) -0.21 0.12 -0.43 0.03 1.00
## cor(vizCI,vizviolin) 0.44 0.11 0.19 0.63 1.00
## cor(vizgradient,vizviolin) 0.75 0.06 0.61 0.86 1.00
## cor(Intercept,p_lt0.05No) -0.14 0.14 -0.40 0.15 1.00
## cor(vizCI,p_lt0.05No) -0.18 0.15 -0.46 0.12 1.00
## cor(vizgradient,p_lt0.05No) -0.12 0.15 -0.40 0.16 1.00
## cor(vizviolin,p_lt0.05No) -0.09 0.15 -0.37 0.19 1.00
## cor(Intercept,logit_p) 0.05 0.14 -0.24 0.32 1.00
## cor(vizCI,logit_p) -0.04 0.15 -0.33 0.24 1.00
## cor(vizgradient,logit_p) 0.04 0.14 -0.24 0.32 1.00
## cor(vizviolin,logit_p) 0.12 0.14 -0.16 0.38 1.00
## cor(p_lt0.05No,logit_p) 0.34 0.17 0.00 0.67 1.00
## cor(Intercept,p_eq0.05No) -0.33 0.23 -0.71 0.18 1.00
## cor(vizCI,p_eq0.05No) -0.26 0.25 -0.69 0.27 1.00
## cor(vizgradient,p_eq0.05No) 0.14 0.24 -0.36 0.59 1.00
## cor(vizviolin,p_eq0.05No) 0.12 0.25 -0.38 0.59 1.00
## cor(p_lt0.05No,p_eq0.05No) 0.30 0.24 -0.23 0.70 1.00
## cor(logit_p,p_eq0.05No) 0.46 0.24 -0.08 0.83 1.00
## cor(Intercept,p_lt0.05No:logit_p) -0.33 0.12 -0.55 -0.08 1.00
## cor(vizCI,p_lt0.05No:logit_p) -0.09 0.14 -0.35 0.20 1.00
## cor(vizgradient,p_lt0.05No:logit_p) -0.04 0.14 -0.31 0.24 1.00
## cor(vizviolin,p_lt0.05No:logit_p) -0.15 0.13 -0.41 0.12 1.00
## cor(p_lt0.05No,p_lt0.05No:logit_p) 0.69 0.09 0.49 0.83 1.00
## cor(logit_p,p_lt0.05No:logit_p) -0.17 0.15 -0.44 0.14 1.00
## cor(p_eq0.05No,p_lt0.05No:logit_p) -0.04 0.25 -0.50 0.47 1.00
## cor(zoi_Intercept,zoi_vizCI) -0.21 0.43 -0.89 0.71 1.00
## cor(zoi_Intercept,zoi_vizgradient) 0.26 0.40 -0.63 0.88 1.00
## cor(zoi_vizCI,zoi_vizgradient) -0.04 0.44 -0.82 0.79 1.00
## cor(zoi_Intercept,zoi_vizviolin) 0.35 0.37 -0.53 0.89 1.00
## cor(zoi_vizCI,zoi_vizviolin) -0.07 0.43 -0.83 0.77 1.00
## cor(zoi_vizgradient,zoi_vizviolin) 0.31 0.44 -0.66 0.91 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 3073 4573
## sd(vizCI) 2490 4291
## sd(vizgradient) 1715 3516
## sd(vizviolin) 1701 3349
## sd(p_lt0.05No) 1564 2522
## sd(logit_p) 1213 2368
## sd(p_eq0.05No) 1424 1125
## sd(p_lt0.05No:logit_p) 1628 3823
## sd(zoi_Intercept) 1989 3544
## sd(zoi_vizCI) 2797 3301
## sd(zoi_vizgradient) 1807 3129
## sd(zoi_vizviolin) 1600 2897
## sd(sigma_Intercept) 2333 3586
## cor(Intercept,vizCI) 1501 3162
## cor(Intercept,vizgradient) 1455 2227
## cor(vizCI,vizgradient) 1496 2984
## cor(Intercept,vizviolin) 1420 2666
## cor(vizCI,vizviolin) 1323 2669
## cor(vizgradient,vizviolin) 1917 3218
## cor(Intercept,p_lt0.05No) 1695 3423
## cor(vizCI,p_lt0.05No) 1529 2717
## cor(vizgradient,p_lt0.05No) 1789 3981
## cor(vizviolin,p_lt0.05No) 1962 3812
## cor(Intercept,logit_p) 1454 2774
## cor(vizCI,logit_p) 1674 3196
## cor(vizgradient,logit_p) 1754 3019
## cor(vizviolin,logit_p) 1857 3309
## cor(p_lt0.05No,logit_p) 793 1494
## cor(Intercept,p_eq0.05No) 5932 4361
## cor(vizCI,p_eq0.05No) 3038 3885
## cor(vizgradient,p_eq0.05No) 4685 5091
## cor(vizviolin,p_eq0.05No) 5149 5404
## cor(p_lt0.05No,p_eq0.05No) 3138 3040
## cor(logit_p,p_eq0.05No) 2989 2534
## cor(Intercept,p_lt0.05No:logit_p) 1645 3425
## cor(vizCI,p_lt0.05No:logit_p) 2069 3914
## cor(vizgradient,p_lt0.05No:logit_p) 2146 3735
## cor(vizviolin,p_lt0.05No:logit_p) 2450 3733
## cor(p_lt0.05No,p_lt0.05No:logit_p) 1474 2842
## cor(logit_p,p_lt0.05No:logit_p) 1264 2666
## cor(p_eq0.05No,p_lt0.05No:logit_p) 1465 2158
## cor(zoi_Intercept,zoi_vizCI) 7699 5789
## cor(zoi_Intercept,zoi_vizgradient) 6804 5292
## cor(zoi_vizCI,zoi_vizgradient) 2972 4635
## cor(zoi_Intercept,zoi_vizviolin) 5690 4752
## cor(zoi_vizCI,zoi_vizviolin) 3177 4656
## cor(zoi_vizgradient,zoi_vizviolin) 2295 5500
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept -0.07 0.24 -0.55 0.39 1.00
## sigma_Intercept -0.08 0.05 -0.18 0.02 1.00
## zoi_Intercept -2.06 0.36 -2.77 -1.39 1.00
## coi_Intercept -3.76 0.37 -4.53 -3.11 1.00
## vizCI 0.69 0.26 0.18 1.21 1.00
## vizgradient -0.98 0.27 -1.51 -0.45 1.00
## vizviolin -0.60 0.25 -1.10 -0.11 1.00
## p_lt0.05No -1.53 0.20 -1.91 -1.14 1.00
## logit_p -0.42 0.04 -0.50 -0.35 1.00
## p_eq0.05No -0.48 0.10 -0.68 -0.28 1.00
## vizCI:p_lt0.05No -0.70 0.23 -1.15 -0.25 1.00
## vizgradient:p_lt0.05No 0.72 0.23 0.28 1.17 1.00
## vizviolin:p_lt0.05No 0.51 0.21 0.08 0.91 1.00
## vizCI:logit_p 0.07 0.04 -0.02 0.15 1.00
## vizgradient:logit_p -0.11 0.04 -0.19 -0.02 1.00
## vizviolin:logit_p -0.09 0.04 -0.17 -0.01 1.00
## p_lt0.05No:logit_p -0.23 0.05 -0.33 -0.13 1.00
## vizCI:p_eq0.05No 0.06 0.13 -0.21 0.33 1.00
## vizgradient:p_eq0.05No 0.35 0.14 0.08 0.61 1.00
## vizviolin:p_eq0.05No 0.31 0.13 0.05 0.56 1.00
## vizCI:p_lt0.05No:logit_p -0.10 0.06 -0.21 0.01 1.00
## vizgradient:p_lt0.05No:logit_p 0.11 0.06 -0.00 0.22 1.00
## vizviolin:p_lt0.05No:logit_p 0.04 0.05 -0.06 0.14 1.00
## sigma_vizCI -0.13 0.05 -0.22 -0.04 1.00
## sigma_vizgradient -0.11 0.05 -0.21 -0.02 1.00
## sigma_vizviolin -0.27 0.05 -0.37 -0.17 1.00
## zoi_vizCI 0.71 0.38 -0.05 1.44 1.00
## zoi_vizgradient 0.18 0.41 -0.63 0.97 1.00
## zoi_vizviolin 0.58 0.41 -0.24 1.36 1.00
## zoi_true_p0.01 -1.39 0.46 -2.31 -0.50 1.00
## zoi_true_p0.04 -2.48 0.58 -3.65 -1.39 1.00
## zoi_true_p0.05 -2.78 0.64 -4.12 -1.58 1.00
## zoi_true_p0.06 -1.67 0.49 -2.66 -0.73 1.00
## zoi_true_p0.1 -1.13 0.44 -2.00 -0.26 1.00
## zoi_true_p0.5 0.76 0.37 0.03 1.48 1.00
## zoi_true_p0.8 1.20 0.37 0.49 1.93 1.00
## zoi_vizCI:true_p0.01 0.38 0.60 -0.77 1.57 1.00
## zoi_vizgradient:true_p0.01 -1.24 0.73 -2.67 0.16 1.00
## zoi_vizviolin:true_p0.01 -1.30 0.70 -2.68 0.04 1.00
## zoi_vizCI:true_p0.04 -0.40 0.79 -1.92 1.12 1.00
## zoi_vizgradient:true_p0.04 -1.11 0.95 -3.02 0.71 1.00
## zoi_vizviolin:true_p0.04 -2.16 1.07 -4.42 -0.22 1.00
## zoi_vizCI:true_p0.05 -0.35 0.87 -2.04 1.34 1.00
## zoi_vizgradient:true_p0.05 -1.37 1.11 -3.69 0.69 1.00
## zoi_vizviolin:true_p0.05 -0.89 0.92 -2.74 0.89 1.00
## zoi_vizCI:true_p0.06 -1.45 0.75 -2.94 0.02 1.00
## zoi_vizgradient:true_p0.06 -0.96 0.75 -2.46 0.54 1.00
## zoi_vizviolin:true_p0.06 -1.69 0.79 -3.29 -0.17 1.00
## zoi_vizCI:true_p0.1 -0.91 0.64 -2.19 0.32 1.00
## zoi_vizgradient:true_p0.1 -0.61 0.65 -1.86 0.65 1.00
## zoi_vizviolin:true_p0.1 -1.56 0.68 -2.93 -0.23 1.00
## zoi_vizCI:true_p0.5 -1.30 0.53 -2.33 -0.27 1.00
## zoi_vizgradient:true_p0.5 -0.68 0.54 -1.75 0.36 1.00
## zoi_vizviolin:true_p0.5 -1.43 0.54 -2.48 -0.38 1.00
## zoi_vizCI:true_p0.8 -1.01 0.51 -2.00 -0.03 1.00
## zoi_vizgradient:true_p0.8 -0.43 0.53 -1.47 0.59 1.00
## zoi_vizviolin:true_p0.8 -0.86 0.53 -1.90 0.16 1.00
## coi_mocat_p 1.44 0.11 1.23 1.67 1.00
## Bulk_ESS Tail_ESS
## Intercept 1378 2533
## sigma_Intercept 2204 3666
## zoi_Intercept 1085 2768
## coi_Intercept 7238 4950
## vizCI 2302 4077
## vizgradient 1864 3872
## vizviolin 1870 3869
## p_lt0.05No 1400 2623
## logit_p 1659 3131
## p_eq0.05No 3353 4268
## vizCI:p_lt0.05No 1770 3330
## vizgradient:p_lt0.05No 1637 3012
## vizviolin:p_lt0.05No 1701 2999
## vizCI:logit_p 1951 3760
## vizgradient:logit_p 1757 3329
## vizviolin:logit_p 1839 3098
## p_lt0.05No:logit_p 1701 3114
## vizCI:p_eq0.05No 4091 4778
## vizgradient:p_eq0.05No 4245 4755
## vizviolin:p_eq0.05No 4045 5249
## vizCI:p_lt0.05No:logit_p 2137 4735
## vizgradient:p_lt0.05No:logit_p 1918 3312
## vizviolin:p_lt0.05No:logit_p 2140 4209
## sigma_vizCI 5196 5605
## sigma_vizgradient 4838 5862
## sigma_vizviolin 4912 5705
## zoi_vizCI 1650 3363
## zoi_vizgradient 1884 3679
## zoi_vizviolin 1918 3855
## zoi_true_p0.01 2487 4262
## zoi_true_p0.04 2792 4238
## zoi_true_p0.05 3216 4704
## zoi_true_p0.06 2455 3950
## zoi_true_p0.1 2363 3479
## zoi_true_p0.5 1952 3900
## zoi_true_p0.8 1735 3448
## zoi_vizCI:true_p0.01 2771 4886
## zoi_vizgradient:true_p0.01 3431 5423
## zoi_vizviolin:true_p0.01 3218 4658
## zoi_vizCI:true_p0.04 3130 4252
## zoi_vizgradient:true_p0.04 3865 5265
## zoi_vizviolin:true_p0.04 3802 5484
## zoi_vizCI:true_p0.05 3754 4785
## zoi_vizgradient:true_p0.05 4543 5915
## zoi_vizviolin:true_p0.05 3614 4876
## zoi_vizCI:true_p0.06 3467 5130
## zoi_vizgradient:true_p0.06 2993 5057
## zoi_vizviolin:true_p0.06 3283 4874
## zoi_vizCI:true_p0.1 2998 4678
## zoi_vizgradient:true_p0.1 2709 5300
## zoi_vizviolin:true_p0.1 3228 4964
## zoi_vizCI:true_p0.5 2475 4631
## zoi_vizgradient:true_p0.5 2364 4379
## zoi_vizviolin:true_p0.5 2399 4525
## zoi_vizCI:true_p0.8 2168 4567
## zoi_vizgradient:true_p0.8 2290 4386
## zoi_vizviolin:true_p0.8 2439 4374
## coi_mocat_p 7002 4889
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## coi_mocat_p1[1] 0.94 0.03 0.87 0.99 1.00 11484 5684
## coi_mocat_p1[2] 0.02 0.02 0.00 0.06 1.00 9830 4602
## coi_mocat_p1[3] 0.02 0.02 0.00 0.06 1.00 10588 4927
## coi_mocat_p1[4] 0.02 0.02 0.00 0.09 1.00 8884 5567
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Now we look at some figures. First we draw some samples from posterior predictive distribution and see how well our simulated replications match with our data:
pp_check(fit_exp1, type = "hist", nsamples = 11)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We see that the histograms of the replicated datasets are similar to observed one, perhaps slight exaggeration of the tails. Next, we look the median confidence of replicated datasets grouped with underlying p-value:
pp_check(fit_exp1, type = "stat_grouped", group = "true_p", stat = "median")
## Using all posterior samples for ppc type 'stat_grouped' by default.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Now grouping based on visualization:
pp_check(fit_exp1, type = "stat_grouped", group = "viz", stat = "mean")
## Using all posterior samples for ppc type 'stat_grouped' by default.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Noting the scale on the x-axis, our histograms look reasonable given our data, although there are some subgroups where our model is slightly over- or underestimating compared to our data, especially in the violin CI group (reasonable changes to our model, such as dropping some interaction terms, did not improve this). Same posterior checks for average participants (with random effects zeroed out) we get very good results:
pp_check(fit_exp1, type = "stat_grouped", group = "true_p", stat = "median", re_formula = NA)
## Using all posterior samples for ppc type 'stat_grouped' by default.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pp_check(fit_exp1, type = "stat_grouped", group = "viz", stat = "mean", re_formula = NA)
## Using all posterior samples for ppc type 'stat_grouped' by default.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Now we are ready to analyze the results. First, the posterior curves of the confidence given the underlying p-value:
comb_exp1 <- fit_exp1$data %>%
data_grid(viz, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %>%
filter(interaction(logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %in%
unique(interaction(
fit_exp1$data$logit_p, fit_exp1$data$p_lt0.05,
fit_exp1$data$p_eq0.05, fit_exp1$data$cat_p,
fit_exp1$data$true_p)))
f_mu_exp1 <- posterior_epred(fit_exp1, newdata = comb_exp1, re_formula = NA)
d <- data.frame(value = c(f_mu_exp1),
p = rep(comb_exp1$true_p, each = nrow(f_mu_exp1)),
viz = rep(comb_exp1$viz, each = nrow(f_mu_exp1)),
iter = 1:nrow(f_mu_exp1))
levels(d$viz) <- c("Textual", "Classic CI", "Gradient CI", "Violin CI")
sumr <- d %>% group_by(viz, p) %>%
summarise(Estimate = mean(value),
Q2.5 = quantile(value, 0.025),
Q97.5 = quantile(value, 0.975)) %>%
mutate(p = as.numeric(levels(p))[p])
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
cols <- c("Textual" = "#D55E00", "Classic CI" = "#0072B2",
"Gradient CI" = "#009E73", "Violin CI" = "#CC79A7")
x_ticks <- c(0.001, 0.01, 0.04, 0.06, 0.1, 0.5, 0.8)
y_ticks <- c(0.05, seq(0.1, 0.9, by = 0.1), 0.95)
dodge <- 0.19
p1 <- sumr %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.1, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p2 <- sumr %>% filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p <- p1 + coord_cartesian(xlim = c(0.001, 0.9), ylim = c(0.045, 0.95)) +
annotation_custom(
ggplotGrob(p2),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p
The confidence level with traditional CI is most constant of all techniques when are within “statistically significant region” i.e. p < 0.05, but there is a large drop when moving to p > 0.05, even larger than with textual information with p-value, which behaves nearly identically with the Violin CI plot until p = 0.05, when the confidence in p-value representation drops below all other techniques. The Gradient CI plot and Violin CI plot behave similarly, except the confidence level in case of Gradient CI plot is constantly below the Violin CI plot.
The probability curves of extreme answer show that traditional CI produces more easily extreme answers when p < 0.05 (so the extreme answer is likely of full confidence), whereas p-value is more likely to lead extreme answer (zero confidence) when p > 0.05. Differences between techniques seem nevertheless quite small compared to overall variation in the estimates.
f_zoi_exp1_sumr <- fitted(fit_exp1, newdata = comb_exp1,
re_formula = NA, dpar = "zoi")
df_01_exp1 <- data.frame(
p = plogis(comb_exp1$logit_p),
viz = comb_exp1$viz,
f_zoi_exp1_sumr)
levels(df_01_exp1$viz) <-
c("Textual", "Classic CI", "Gradient CI", "Violin CI")
y_ticks <- c(0.0001, 0.01, seq(0.1,0.9,by=0.2))
p <- df_01_exp1 %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_linerange(aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(width=0.19)) +
geom_line(alpha=0.5, position = position_dodge(width=0.19)) +
ylab("Probability of all-or-none answer") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
theme_classic() +
scale_y_continuous(trans = "logit",
breaks = y_ticks, labels = y_ticks, minor_breaks = NULL) +
scale_x_continuous(trans = "logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10), legend.position = "bottom",
axis.title.x = element_text(size = 12),
axis.text.y = element_text(size = 10), axis.title.y = element_text(size = 12),
legend.text=element_text(size = 10), strip.text.x = element_text(size = 10))
p
Finally, we can compute the average drop in perceived confidence when moving from p = 0.04 to p = 0.06:
d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
summarise(mean = mean(difference), sd = sd(difference),
"2.5%" = quantile(difference, 0.025),
"97.5" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 5
## viz mean sd `2.5%` `97.5`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Textual 0.232 0.0253 0.182 0.282
## 2 Classic CI 0.291 0.0232 0.246 0.337
## 3 Gradient CI 0.150 0.0242 0.103 0.197
## 4 Violin CI 0.151 0.0220 0.109 0.195
Let’s also visualize this:
p <- d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
ggplot(aes(x = difference, fill = viz, colour = viz)) +
geom_density(bw = 0.01, alpha = 0.6) +
theme_classic() +
scale_fill_manual("Representation", values = cols) +
scale_colour_manual("Representation", values = cols) +
ylab("Posterior density") +
xlab("E[confidence(p=0.04) - confidence(p=0.06)]") +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.text = element_text(size = 14))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
p
Note that the cliff effect between viz styles are not independent, i.e. if there is a large cliff effect with Violin CI then the cliff effect with p-value is likely larger as well. This can be seen from the posterior probabilities that cliff effect is larger with viz 1 (row variable) than with viz 2 (column variable):
postprob <- d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
group_by(iter) %>%
mutate(p_vs_ci = difference[viz == "Textual"] - difference[viz == "Classic CI"],
p_vs_gradient = difference[viz == "Textual"] - difference[viz == "Gradient CI"],
p_vs_violin = difference[viz == "Textual"] - difference[viz == "Violin CI"],
ci_vs_gradient = difference[viz == "Classic CI"] - difference[viz == "Gradient CI"],
ci_vs_violin = difference[viz == "Classic CI"] - difference[viz == "Violin CI"],
gradient_vs_violin = difference[viz == "Gradient CI"] -
difference[viz == "Violin CI"]) %>%
ungroup() %>% summarise(
"P(p > CI)" = mean(p_vs_ci > 0),
"P(p > gradient)" = mean(p_vs_gradient > 0),
"P(p > violin)" = mean(p_vs_violin > 0),
"P(CI > gradient)" = mean(ci_vs_gradient > 0),
"P(CI > violin)" = mean(ci_vs_violin > 0),
"P(gradient > violin)" = mean(gradient_vs_violin > 0),
"P(p > CI)" = mean(p_vs_ci > 0))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
round(t(as.data.frame(postprob)), 2)
## [,1]
## P(p > CI) 0.01
## P(p > gradient) 1.00
## P(p > violin) 1.00
## P(CI > gradient) 1.00
## P(CI > violin) 1.00
## P(gradient > violin) 0.49
Now we consider expanded model with with expertise as predictor:
fit_expertise <- brm(bf(
confidence ~
expertise * viz * p_lt0.05 * logit_p +
expertise * viz * p_eq0.05 +
(viz + p_lt0.05 * logit_p + p_eq0.05 | id),
zoi ~
expertise * viz + viz * true_p + (viz | id),
coi ~ mo(cat_p),
sigma ~ expertise * viz + (1 | id)),
data = data,
family = logit_p_gaussian,
stanvars = stanvar(scode = stan_funs, block = "functions"),
chains = 4, cores = 4, iter = 2000, init = 0,
save_warmup = FALSE, save_all_pars = TRUE, refresh = 0)
comb_exp1 <- fit_expertise$data %>%
data_grid(expertise, viz, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %>%
filter(interaction(expertise, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %in%
unique(interaction(fit_expertise$data$expertise,
fit_expertise$data$logit_p, fit_expertise$data$p_lt0.05,
fit_expertise$data$p_eq0.05, fit_expertise$data$cat_p,
fit_expertise$data$true_p)))
f_mu_exp1 <- posterior_epred(fit_expertise, newdata = comb_exp1, re_formula = NA)
d <- data.frame(value = c(f_mu_exp1),
p = rep(comb_exp1$true_p, each = nrow(f_mu_exp1)),
viz = rep(comb_exp1$viz, each = nrow(f_mu_exp1)),
expertise = rep(comb_exp1$expertise, each = nrow(f_mu_exp1)),
iter = 1:nrow(f_mu_exp1))
levels(d$viz) <- c("Textual", "Classic CI", "Gradient CI", "Violin CI")
Here are posterior curves for the four different groups:
sumr <- d %>% group_by(viz, p, expertise) %>%
summarise(Estimate = mean(value),
Q2.5 = quantile(value, 0.025),
Q97.5 = quantile(value, 0.975)) %>%
mutate(p = as.numeric(levels(p))[p])
## `summarise()` regrouping output by 'viz', 'p' (override with `.groups` argument)
x_ticks <- c(0.001, 0.01, 0.04, 0.06, 0.1, 0.5, 0.8)
y_ticks <- c(0.05, seq(0.1, 0.9, by = 0.1), 0.95)
dodge <- 0.19
p11 <- sumr %>% filter(expertise == "Stats/ML") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.margin = margin(t = -0.1, b = 0, unit = "cm"),
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07),
ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p21 <- sumr %>% filter(expertise == "Stats/ML") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
yrange <- c(min(sumr$Q2.5)-0.001, max(sumr$Q97.5) +0.001)
p1 <- p11 + coord_cartesian(xlim = c(0.001, 0.9),
ylim = yrange) +
annotation_custom(
ggplotGrob(p21),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p12 <- sumr %>% filter(expertise == "VIS/HCI") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks, minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14, margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14, margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p22 <- sumr %>% filter(expertise == "VIS/HCI") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p2 <- p12 + coord_cartesian(xlim = c(0.001, 0.9), ylim = yrange) +
annotation_custom(
ggplotGrob(p22),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p13 <- sumr %>% filter(expertise == "Social science and humanities") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks, minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14, margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14, margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p23 <- sumr %>% filter(expertise == "Social science and humanities") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p3 <- p13 + coord_cartesian(xlim = c(0.001, 0.9), ylim = yrange) +
annotation_custom(
ggplotGrob(p23),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p14 <- sumr %>% filter(expertise == "Physical and life sciences") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks, minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14, margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14, margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p24 <- sumr %>% filter(expertise == "Physical and life sciences") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p4 <- p14 + coord_cartesian(xlim = c(0.001, 0.9), ylim = yrange) +
annotation_custom(
ggplotGrob(p24),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
library(patchwork)
p <- (p1 + ggtitle("Stats/ML")) + (p2 + ggtitle("VIS/HCI")) +
(p3 + ggtitle("Social sciences and humanities")) +
(p4 + ggtitle("Physical and life sciences"))
p
There are some differences between confidence curves between groups: In Physical and life sciences the visualization affects only little on the confidence curves; gradient CI and violin CI produce very linear curves in VIS/HCI group; and there is very large drop in confidence in case of classic CI in Stats/ML group. However, the ordering in terms of cliff effect is same in all groups.
We can also draw same figure when averaging over the groups:
sumr <- d %>% group_by(viz, p) %>%
summarise(Estimate = mean(value),
Q2.5 = quantile(value, 0.025),
Q97.5 = quantile(value, 0.975)) %>%
mutate(p = as.numeric(levels(p))[p])
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
p1 <- sumr %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p2 <- sumr %>% filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p <- p1 + coord_cartesian(xlim = c(0.001, 0.9), ylim = yrange) +
annotation_custom(
ggplotGrob(p2),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p
We see that the results are very similar to the model without the expertise variable, except naturally the credible intervals in the above figure are somewhat wider when we average over the expertise groups with different overall levels (in model without expertise, these differences are captured by the participant-level effects which are then zeroed out when considering average participant).
Now the potential cliff effect:
d %>% group_by(viz, iter,expertise) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
ggplot(aes(x = difference, fill = viz, colour = viz)) +
geom_density(bw = 0.01, alpha = 0.6) +
theme_classic() +
scale_fill_manual("Representation", values = cols) +
scale_color_manual("Representation", values = cols) +
ylab("Posterior density") +
xlab("Cliff effect") +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +facet_wrap(~expertise)
## `summarise()` regrouping output by 'viz', 'iter' (override with `.groups` argument)
d %>% group_by(expertise, viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
summarise(mean = mean(difference), sd = sd(difference),
"2.5%" = quantile(difference, 0.025),
"97.5" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'expertise', 'viz' (override with `.groups` argument)
## `summarise()` regrouping output by 'expertise' (override with `.groups` argument)
## # A tibble: 16 x 6
## # Groups: expertise [4]
## expertise viz mean sd `2.5%` `97.5`
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Stats/ML Textual 0.305 0.0547 0.195 0.409
## 2 Stats/ML Classic CI 0.413 0.0450 0.324 0.502
## 3 Stats/ML Gradient CI 0.212 0.0518 0.112 0.315
## 4 Stats/ML Violin CI 0.214 0.0462 0.124 0.304
## 5 VIS/HCI Textual 0.214 0.0502 0.117 0.311
## 6 VIS/HCI Classic CI 0.240 0.0409 0.162 0.323
## 7 VIS/HCI Gradient CI 0.0605 0.0417 -0.0202 0.143
## 8 VIS/HCI Violin CI 0.0956 0.0319 0.0347 0.161
## 9 Social science and humanities Textual 0.235 0.0400 0.158 0.312
## 10 Social science and humanities Classic CI 0.278 0.0412 0.196 0.358
## 11 Social science and humanities Gradient CI 0.123 0.0408 0.0424 0.203
## 12 Social science and humanities Violin CI 0.138 0.0423 0.0567 0.223
## 13 Physical and life sciences Textual 0.202 0.0447 0.114 0.290
## 14 Physical and life sciences Classic CI 0.264 0.0415 0.183 0.345
## 15 Physical and life sciences Gradient CI 0.204 0.0407 0.125 0.286
## 16 Physical and life sciences Violin CI 0.171 0.0378 0.0991 0.245
We see some differences between the group-wise estimates (but note the standard deviation). For example the drop in confidence seems to be smallest in the VIS/HCI group with gradient and violin CIs (and p-value and classic CI perform relatively similar) and somewhat surprisingly the drops are largest in the Stats/ML group for all representation values. However, in all groups the classic CI has largest drop, with p-value following second (expect there is a virtually tie with the gradient CI in Phys./life sciences group), and relatively equal drops with gradient and violin CIs.
When we average over these groups to obtain marginal means we see almost identical results compared to the model without expertise:
d %>% group_by(expertise, viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
group_by(viz) %>%
summarise(mean = mean(difference), sd = sd(difference),
"2.5%" = quantile(difference, 0.025),
"97.5" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'expertise', 'viz' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 5
## viz mean sd `2.5%` `97.5`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Textual 0.239 0.0622 0.128 0.376
## 2 Classic CI 0.299 0.0797 0.180 0.470
## 3 Gradient CI 0.150 0.0762 0.00689 0.287
## 4 Violin CI 0.155 0.0590 0.0513 0.274
Density plots of course show multimodality due to group differences:
d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
ggplot(aes(x = difference, fill = viz, colour = viz)) +
geom_density(bw = 0.01, alpha = 0.6) +
theme_classic() +
scale_fill_manual("Representation",
values = cols[1:4]) +
scale_colour_manual("Representation",
values = cols[1:4]) +
ylab("Posterior density") +
xlab("Cliff effect") +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14))
## `summarise()` regrouping output by 'viz', 'iter' (override with `.groups` argument)
When reflecting these results to the descriptive statistics of the data, especially the cliff effect for each expertise group and visualization style, it should be noted that especially in VIS/HCI case there are large proportion of answers with a clear “negative drop” of confidence around p = 0.05, which could be considered “equally wrong interpretation” as the cliff effect itself. These cases also negate the big changes to other direction making the overall drop for these groups smaller.
For example, here is the proportion of curves where the the change in confidence δ < − 0.2 (average drop over all viz and expertise groups was estimated as 0.2:
data %>% group_by(id, viz, expertise) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz, expertise) %>%
summarise(
negative_cliff = round(mean(difference < -0.2), 2))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 16 x 3
## # Groups: viz [4]
## viz expertise negative_cliff
## <fct> <fct> <dbl>
## 1 p Stats/ML 0
## 2 p VIS/HCI 0.09
## 3 p Social science and humanities 0
## 4 p Physical and life sciences 0
## 5 CI Stats/ML 0
## 6 CI VIS/HCI 0.03
## 7 CI Social science and humanities 0
## 8 CI Physical and life sciences 0
## 9 gradient Stats/ML 0
## 10 gradient VIS/HCI 0.09
## 11 gradient Social science and humanities 0.12
## 12 gradient Physical and life sciences 0
## 13 violin Stats/ML 0
## 14 violin VIS/HCI 0.03
## 15 violin Social science and humanities 0.03
## 16 violin Physical and life sciences 0
data %>% group_by(id, viz, expertise) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(expertise) %>%
summarise(
negative_cliff = round(mean(difference < -0.2), 2))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 2
## expertise negative_cliff
## <fct> <dbl>
## 1 Stats/ML 0
## 2 VIS/HCI 0.06
## 3 Social science and humanities 0.04
## 4 Physical and life sciences 0
data %>% group_by(id, viz, expertise) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz) %>%
summarise(
negative_cliff = round(mean(difference < -0.2), 2))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 2
## viz negative_cliff
## <fct> <dbl>
## 1 p 0.03
## 2 CI 0.01
## 3 gradient 0.06
## 4 violin 0.02
Finally we test how does the results depend on those confidence curves which have clearly positive slope or large increases in confidence when p-value increases.
We use simple linear model with logit-transformed p-value and trimmed logit-transformed confidence, and check whether the corresponding coefficient is clearly positive (arbitrarily chose as 0.1), and in addition to this we remove those curves where the difference between two consecutive confidence values are larger than 0.2 (this should of course be negative):
data <- readRDS("experiment1/data/exp1_data.rds")
outliers_slope <- data %>% group_by(id, viz) %>%
mutate(p_logit = qlogis(p), c_logit = qlogis(
ifelse(confidence == 0, 0.001, ifelse(confidence == 1, 0.999, confidence)))) %>%
summarize(
slope = coef(lm(c_logit ~ p_logit))[2]) %>%
filter(slope > 0.1)
outliers_diff <- data %>% group_by(id, viz) %>%
arrange(p, .by_group = TRUE) %>%
mutate(diff = c(0,diff(confidence)), neg_diff = any(diff > 0.2)) %>%
filter(neg_diff)
data_cleaned <- data %>%
filter(!(interaction(id,viz) %in%
interaction(outliers_slope$id, outliers_slope$viz))) %>%
filter(!(interaction(id,viz) %in%
interaction(outliers_diff$id, outliers_diff$viz)))
data_cleaned <- data_cleaned %>%
mutate(
logit_p = qlogis(p),
p_lt0.05 = factor(p < 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
p_eq0.05 = factor(p == 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
cat_p = recode_factor(true_p,
"0.06" = ">0.05", "0.1" = ">0.05", "0.5" = ">0.05", "0.8" = ">0.05",
.ordered = TRUE))
fit_exp1 <- brm(bf(
confidence ~
viz * p_lt0.05 * logit_p +
viz * p_eq0.05 +
(viz + p_lt0.05 * logit_p + p_eq0.05 | id),
zoi ~
viz * true_p + (viz | id),
coi ~ mo(cat_p),
sigma ~ viz + (1 | id)),
data = data_cleaned,
family = logit_p_gaussian,
stanvars = stanvar(scode = stan_funs, block = "functions"),
chains = 4, cores = 4, iter = 2000, init = 0,
save_warmup = FALSE, refresh = 0)
comb_exp1 <- fit_exp1$data %>%
data_grid(viz, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %>%
filter(interaction(logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %in%
unique(interaction(
fit_exp1$data$logit_p, fit_exp1$data$p_lt0.05,
fit_exp1$data$p_eq0.05, fit_exp1$data$cat_p,
fit_exp1$data$true_p)))
f_mu_exp1 <- posterior_epred(fit_exp1, newdata = comb_exp1, re_formula = NA)
d <- data.frame(value = c(f_mu_exp1),
p = rep(comb_exp1$true_p, each = nrow(f_mu_exp1)),
viz = rep(comb_exp1$viz, each = nrow(f_mu_exp1)),
iter = 1:nrow(f_mu_exp1))
levels(d$viz) <- c("Textual", "Classic CI", "Gradient CI", "Violin CI")
sumr <- d %>% group_by(viz, p) %>%
summarise(Estimate = mean(value),
Q2.5 = quantile(value, 0.025),
Q97.5 = quantile(value, 0.975)) %>%
mutate(p = as.numeric(levels(p))[p])
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
cols <- c("Textual" = "#D55E00", "Classic CI" = "#0072B2",
"Gradient CI" = "#009E73", "Violin CI" = "#CC79A7")
x_ticks <- c(0.001, 0.01, 0.04, 0.06, 0.1, 0.5, 0.8)
y_ticks <- c(0.05, seq(0.1, 0.9, by = 0.1), 0.95)
dodge <- 0.19
p1 <- sumr %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.1, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p2 <- sumr %>% filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p <- p1 + coord_cartesian(xlim = c(0.001, 0.9), ylim = c(0.045, 0.95)) +
annotation_custom(
ggplotGrob(p2),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p
d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
summarise(mean = mean(difference), sd = sd(difference),
"2.5%" = quantile(difference, 0.025),
"97.5" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 5
## viz mean sd `2.5%` `97.5`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Textual 0.260 0.0264 0.208 0.313
## 2 Classic CI 0.316 0.0250 0.267 0.367
## 3 Gradient CI 0.166 0.0248 0.118 0.215
## 4 Violin CI 0.175 0.0230 0.130 0.221
Overall, the results are in line with the analysis of to full data, except that the average δ is larger in all groups.
Now we focus on analysis the subjective rankings of the technique. Read the feedback data and merge it with the previous data which contains the expertise information:
files <- list.files(path, pattern = "subjective", full.names = TRUE)
n <- length(files)
rankdata <- data.frame(id = rep(1:n, each=4),
viz = factor(rep(c("p", "ci", "violin", "gradient")),
levels=c("p", "ci", "violin", "gradient")),
rank = factor(NA, levels=1:4))
for(i in 1:n) {
fb <- fromJSON(files[i])
rankdata$id[4*(i-1) + 1:4] <- strsplit(strsplit(files[i], "subjective")[[1]], ".txt")[[2]]
rankdata$rank[4*(i-1) + 1:4] <- factor(fb$rank)
}
rankdata$viz <- recode_factor(rankdata$viz, "p" = "p", "ci" = "CI",
"gradient" = "gradient", "violin" = "violin")
rankdata$rank <- factor(rankdata$rank, ordered = TRUE)
rankdata$id <- factor(rankdata$id, levels = levels(data$id))
ranks_exp1 <- distinct(inner_join(rankdata, data[,c("id", "viz", "expertise")]))
For analysing the subjective rankings of the representation styles, we use a Bayesian ordinal regression model. We test two models, one with expertise and another without it:
fit_rank1 <- brm(rank ~ viz + (1 | id), family=cumulative,
data = ranks_exp1, refresh = 0)
saveRDS(fit_rank1, file = "experiment1/results/fit_rank1.rds")
Plot ranking probabilities:
colsrank <- scales::brewer_pal(palette = "PiYG",
direction = -1)(4)
effects_exp1 <- conditional_effects(fit_rank1, effects = "viz",
plot = FALSE, categorical = TRUE, reformula = NA)
p <- ggplot(effects_exp1[[1]], aes(x = viz, y = estimate__, colour = cats__)) +
geom_point(position=position_dodge(0.5)) +
geom_errorbar(width=0.25, aes(ymin=lower__, ymax = upper__),
position = position_dodge(0.5)) +
theme_classic() +
ylab("Ranking \n probability") +
xlab("Representation") +
scale_x_discrete(labels =c("Textual", "Classic CI", "Gradient CI", "Violin CI")) +
scale_color_manual("Rank",
values = colsrank,
labels = c("1 (best)", "2", "3", "4 (worst)")) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12,hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.text = element_text(size = 14))
p
We see that the p-values are likely to be ranked very low, while violin CI and classic CI are the most preferred options, and gradient CI seems to divide opinions most.
Let us turn our attention to the second experiment, for which we essentially use the same workflow as for the first experiment.
path <- "experiment2/data"
answers <- list.files(path, pattern="answers", full.names = TRUE)
n <- length(answers)
# create a data frame for the results
data_raw <- data.frame(id = rep(1:n, each = 32), viz = NA, replication = NA, value = NA,
expertise = NA, degree = NA, age = NA, experience = NA, tools = NA)
# read in answers, not optimal way will do
for(i in 1:n){
x <- strsplit(fromJSON(answers[i]), ",")
dem <- fromJSON(paste0(path, "/demography", x[[1]][1], ".txt"))
for(j in 1:32) {
data_raw[32*(i-1) + j, c("id", "viz", "replication", "value")] <- x[[j]]
data_raw[32*(i-1) + j, c("expertise", "degree", "age", "experience", "tools")] <-
dem[c("expertise", "level", "age", "experience", "tools")]
}
}
saveRDS(data_raw, file = "experiment2/data/data_raw.rds")
# remove person who didn't answer on the demography part
data_raw <- data_raw[data_raw$expertise != "",]
true_p <- c(0.001, 0.01, 0.04, 0.05, 0.06, 0.1, 0.5, 0.8)
data2 <- data_raw %>% mutate(n = factor(ifelse(as.numeric(id) %% 8 < 4, 50, 200)),
id = factor(id),
viz = relevel(factor(viz, labels = c("CI",
"Gradient",
"Continuous Violin",
"Discrete Violin")),
"CI"),
replication = as.numeric(replication),
value = as.numeric(value),
p = true_p[replication],
true_p = factor(p), # for monotonic but non-linear effect on confidence
confidence = (value - 1) / 99,
expertise = factor(expertise)) %>% arrange(id, viz)
# Classify the expertise
data2$expertise <- recode_factor(data2$expertise,
"Statistics" = "Stats/ML",
"machine learning, statistics" = "Stats/ML",
"Human Factors, experiment design" = "Stats/ML",
"Consulting" = "Stats/ML",
"Computer vision" = "Stats/ML",
"Meta-research" = "Stats/ML",
"Epidemiology" = "Stats/ML",
"infovis" = "VIS/HCI",
"HCI and VIS" = "VIS/HCI",
"HCI" = "VIS/HCI",
"vis" = "VIS/HCI",
"Vis and HCI" = "VIS/HCI",
"Visualisation" = "VIS/HCI",
"Visualization" = "VIS/HCI",
"sociology" = "Social science and humanities",
"Sociology" = "Social science and humanities",
"Psychology" = "Social science and humanities",
"psychology" = "Social science and humanities",
"health economics" = "Social science and humanities",
"Sport psychology" = "Social science and humanities",
"economics" = "Social science and humanities",
"Psychology / Neuroscience" = "Physical and life sciences",
"Ecology" = "Physical and life sciences",
"Biology" = "Physical and life sciences",
"Biology " = "Physical and life sciences",
"Developmental Biology" = "Physical and life sciences",
"Microbiology" = "Physical and life sciences"
)
data2$expertise <- relevel(data2$expertise, "Stats/ML")
As in first experiment, we first look at some descriptive statistics.
ids <- which(!duplicated(data2$id))
barplot(table(data2$expertise[ids]))
barplot(table(data2$degree[ids]))
hist(as.numeric(data2$age[ids]))
Again we now focus on the cliff effect as difference between confidence when p-value=0.04 versus p-value=0.06:
data2 %>% group_by(id, viz) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz) %>%
summarise(
mean = mean(difference),
median = median(difference),
sd = sd(difference),
se = sd(difference) / sqrt(length(difference)),
"2.5%" = quantile(difference, 0.025),
"97.5%" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'id' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 7
## viz mean median sd se `2.5%` `97.5%`
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CI 0.0740 0.0606 0.123 0.0201 -0.220 0.284
## 2 Gradient 0.00928 0 0.124 0.0203 -0.207 0.252
## 3 Continuous Violin 0.0131 0 0.0850 0.0140 -0.154 0.173
## 4 Discrete Violin 0.0633 0.0404 0.180 0.0295 -0.168 0.501
data2 %>% group_by(id, viz) %>%
summarize(difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
ggplot(aes(x = viz, y = difference)) +
geom_violin() +
geom_point(alpha = 0.5, position = position_jitter(0.1)) +
scale_y_continuous("Difference in confidence when p-value is 0.06 vs 0.04") +
scale_x_discrete("Representation") +
theme_classic()
## `summarise()` regrouping output by 'id' (override with `.groups` argument)
Interestingly, while the cliff effect is again largest with classic CI, there are some cases where the discrete Violin CI has lead to very large drop in confidence. Overall the cliff effect seems to be much smaller than in the one-sample case (there the average drop was around 0.1-0.3 depending on the technique).
Now same but with subgrouping using sample size:
data2 %>% group_by(id, viz, n) %>%
summarize(diff = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz, n) %>%
summarise(
mean = mean(diff),
sd = sd(diff),
se = sd(diff) / sqrt(length(diff)),
"2.5%" = quantile(diff, 0.025),
"97.5%" = quantile(diff, 0.975))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 8 x 7
## # Groups: viz [4]
## viz n mean sd se `2.5%` `97.5%`
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CI 50 8.44e- 2 0.107 0.0260 -0.0323 0.327
## 2 CI 200 6.52e- 2 0.136 0.0305 -0.255 0.239
## 3 Gradient 50 4.70e-18 0.115 0.0279 -0.216 0.152
## 4 Gradient 200 1.72e- 2 0.133 0.0297 -0.183 0.290
## 5 Continuous Violin 50 1.19e- 3 0.0730 0.0177 -0.113 0.143
## 6 Continuous Violin 200 2.32e- 2 0.0946 0.0212 -0.162 0.158
## 7 Discrete Violin 50 1.06e- 1 0.179 0.0433 -0.0303 0.570
## 8 Discrete Violin 200 2.73e- 2 0.177 0.0396 -0.236 0.437
and expertise:
data2 %>% group_by(id, viz, expertise) %>%
summarize(diff = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
group_by(viz, expertise) %>%
summarise(
mean = mean(diff),
sd = sd(diff),
se = sd(diff) / sqrt(length(diff)),
"2.5%" = quantile(diff, 0.025),
"97.5%" = quantile(diff, 0.975))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 16 x 7
## # Groups: viz [4]
## viz expertise mean sd se `2.5%` `97.5%`
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CI Stats/ML 0.0909 0.0674 0.0225 -0.00404 0.192
## 2 CI VIS/HCI 0.0530 0.130 0.0460 -0.179 0.182
## 3 CI Social science and hu~ 0.0743 0.0874 0.0234 -0.0338 0.226
## 4 CI Physical and life sci~ 0.0758 0.237 0.0966 -0.258 0.370
## 5 Gradient Stats/ML -0.0281 0.131 0.0437 -0.234 0.0909
## 6 Gradient VIS/HCI 0.00126 0.152 0.0538 -0.193 0.223
## 7 Gradient Social science and hu~ 0.0310 0.127 0.0339 -0.145 0.274
## 8 Gradient Physical and life sci~ 0.0253 0.0641 0.0262 -0.0631 0.0985
## 9 Continuous Vi~ Stats/ML 0.0258 0.0804 0.0268 -0.107 0.125
## 10 Continuous Vi~ VIS/HCI 0.00758 0.130 0.0460 -0.168 0.180
## 11 Continuous Vi~ Social science and hu~ 0.00433 0.0730 0.0195 -0.0977 0.121
## 12 Continuous Vi~ Physical and life sci~ 0.0219 0.0583 0.0238 -0.0354 0.119
## 13 Discrete Viol~ Stats/ML -0.00449 0.0639 0.0213 -0.113 0.0566
## 14 Discrete Viol~ VIS/HCI 0.115 0.313 0.111 -0.274 0.618
## 15 Discrete Viol~ Social science and hu~ 0.0722 0.137 0.0366 -0.115 0.359
## 16 Discrete Viol~ Physical and life sci~ 0.0758 0.165 0.0674 -0.0947 0.347
data2 %>% group_by(id, viz,expertise) %>%
summarize(
difference = confidence[true_p==0.04] - confidence[true_p==0.06]) %>%
ggplot(aes(x=viz, y = difference)) + geom_violin() + theme_classic() +
scale_y_continuous("Difference in confidence when p-value is 0.04 vs 0.06") +
scale_x_discrete("Representation") +
geom_point(aes(colour = expertise), position=position_jitter(0.1))
## `summarise()` regrouping output by 'id', 'viz' (override with `.groups` argument)
It is difficult to say anything definite but there doesn’t seem to be clear differences between samples sizes or expertise, although again it is VIS/HCI group which can be “blamed” for extreme changes in violin cases.
Let’s check how the much extreme answers (full or zero confidence) there are in different groups:
data2 %>% group_by(id, viz, n) %>%
mutate(extreme = confidence %in% c(0, 1)) %>%
group_by(viz, n) %>%
summarise(
mean = mean(extreme),
sd = sd(extreme),
se = sd(extreme) / sqrt(length(extreme)))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 8 x 5
## # Groups: viz [4]
## viz n mean sd se
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 CI 50 0.125 0.332 0.0285
## 2 CI 200 0.156 0.364 0.0288
## 3 Gradient 50 0.169 0.376 0.0323
## 4 Gradient 200 0.169 0.376 0.0297
## 5 Continuous Violin 50 0.140 0.348 0.0298
## 6 Continuous Violin 200 0.162 0.370 0.0293
## 7 Discrete Violin 50 0.147 0.355 0.0305
## 8 Discrete Violin 200 0.119 0.325 0.0257
data2 %>% group_by(id, viz, expertise) %>%
mutate(extreme = confidence %in% c(0, 1)) %>%
group_by(viz, expertise) %>%
summarise(
mean = mean(extreme),
sd = sd(extreme),
se = sd(extreme) / sqrt(length(extreme)))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## # A tibble: 16 x 5
## # Groups: viz [4]
## viz expertise mean sd se
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 CI Stats/ML 0.0556 0.231 0.0272
## 2 CI VIS/HCI 0.125 0.333 0.0417
## 3 CI Social science and humanities 0.205 0.406 0.0383
## 4 CI Physical and life sciences 0.146 0.357 0.0515
## 5 Gradient Stats/ML 0.0278 0.165 0.0195
## 6 Gradient VIS/HCI 0.203 0.406 0.0507
## 7 Gradient Social science and humanities 0.205 0.406 0.0383
## 8 Gradient Physical and life sciences 0.25 0.438 0.0632
## 9 Continuous Violin Stats/ML 0.0417 0.201 0.0237
## 10 Continuous Violin VIS/HCI 0.172 0.380 0.0475
## 11 Continuous Violin Social science and humanities 0.196 0.399 0.0377
## 12 Continuous Violin Physical and life sciences 0.188 0.394 0.0569
## 13 Discrete Violin Stats/ML 0.0556 0.231 0.0272
## 14 Discrete Violin VIS/HCI 0.0938 0.294 0.0367
## 15 Discrete Violin Social science and humanities 0.188 0.392 0.0370
## 16 Discrete Violin Physical and life sciences 0.167 0.377 0.0544
Compared to first experiment, here Stats/ML group performs best.
Again, create some additional variables:
data2 <- data2 %>%
mutate(
logit_p = qlogis(p),
p_lt0.05 = factor(p < 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
p_eq0.05 = factor(p == 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
cat_p = recode_factor(true_p, "0.06" = ">0.05", "0.1" = ">0.05", "0.5" = ">0.05", "0.8" = ">0.05",
.ordered = TRUE))
And fit the same models as in first experiment:
fit_exp2 <- brm(bf(
confidence ~
viz * p_lt0.05 * logit_p +
viz * p_eq0.05 +
(viz + p_lt0.05 * logit_p + p_eq0.05 | id),
zoi ~
viz * true_p + (viz | id),
coi ~ mo(cat_p),
sigma ~ viz + (1 | id)),
data = data2,
family = logit_p_gaussian,
stanvars = stanvar(scode = stan_funs, block = "functions"),
chains = 4, cores = 4, iter = 2000, init = 0,
save_warmup = FALSE, save_all_pars = TRUE, refresh = 0)
And same with expertise:
fit_expertise <- brm(bf(
confidence ~
expertise * viz * p_lt0.05 * logit_p +
expertise * viz * p_eq0.05 +
(viz + p_lt0.05 * logit_p + p_eq0.05 | id),
zoi ~
expertise * viz + expertise * true_p + viz * true_p + (viz | id),
coi ~ mo(cat_p),
sigma ~ expertise * viz + (1 | id)),
data = data,
family = logit_p_gaussian,
stanvars = stanvar(scode = stan_funs, block = "functions"),
chains = 4, cores = 4, iter = 2000, init = 0,
save_warmup = FALSE, save_all_pars = TRUE, refresh = 0)
fit_exp2
## Family: logit_p_gaussian
## Links: mu = identity; sigma = log; zoi = logit; coi = logit
## Formula: confidence ~ viz * p_lt0.05 * logit_p + viz * p_eq0.05 + (viz + p_lt0.05 * logit_p + p_eq0.05 | id)
## zoi ~ viz * true_p + (viz | id)
## coi ~ mo(cat_p)
## sigma ~ viz + (1 | id)
## Data: data (Number of observations: 1184)
## Samples: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~id (Number of levels: 37)
## Estimate Est.Error l-95% CI
## sd(Intercept) 1.30 0.20 0.92
## sd(vizGradient) 0.62 0.12 0.42
## sd(vizContinuousViolin) 0.65 0.12 0.44
## sd(vizDiscreteViolin) 0.89 0.16 0.62
## sd(p_lt0.05No) 0.84 0.19 0.48
## sd(logit_p) 0.23 0.04 0.17
## sd(p_eq0.05No) 0.14 0.09 0.01
## sd(p_lt0.05No:logit_p) 0.33 0.06 0.22
## sd(zoi_Intercept) 3.87 0.73 2.66
## sd(zoi_vizGradient) 1.04 0.82 0.05
## sd(zoi_vizContinuousViolin) 0.61 0.44 0.03
## sd(zoi_vizDiscreteViolin) 0.83 0.60 0.04
## sd(sigma_Intercept) 0.28 0.05 0.20
## cor(Intercept,vizGradient) -0.08 0.18 -0.42
## cor(Intercept,vizContinuousViolin) 0.14 0.18 -0.21
## cor(vizGradient,vizContinuousViolin) 0.36 0.19 -0.04
## cor(Intercept,vizDiscreteViolin) -0.06 0.18 -0.39
## cor(vizGradient,vizDiscreteViolin) 0.11 0.21 -0.31
## cor(vizContinuousViolin,vizDiscreteViolin) 0.33 0.19 -0.07
## cor(Intercept,p_lt0.05No) -0.80 0.11 -0.93
## cor(vizGradient,p_lt0.05No) -0.05 0.20 -0.45
## cor(vizContinuousViolin,p_lt0.05No) -0.18 0.20 -0.56
## cor(vizDiscreteViolin,p_lt0.05No) 0.06 0.20 -0.34
## cor(Intercept,logit_p) 0.56 0.14 0.25
## cor(vizGradient,logit_p) 0.04 0.20 -0.35
## cor(vizContinuousViolin,logit_p) -0.03 0.19 -0.41
## cor(vizDiscreteViolin,logit_p) -0.21 0.18 -0.55
## cor(p_lt0.05No,logit_p) -0.41 0.19 -0.71
## cor(Intercept,p_eq0.05No) 0.17 0.30 -0.45
## cor(vizGradient,p_eq0.05No) 0.06 0.31 -0.56
## cor(vizContinuousViolin,p_eq0.05No) 0.22 0.31 -0.44
## cor(vizDiscreteViolin,p_eq0.05No) 0.04 0.31 -0.57
## cor(p_lt0.05No,p_eq0.05No) -0.22 0.31 -0.75
## cor(logit_p,p_eq0.05No) 0.14 0.31 -0.51
## cor(Intercept,p_lt0.05No:logit_p) -0.80 0.09 -0.94
## cor(vizGradient,p_lt0.05No:logit_p) -0.06 0.18 -0.41
## cor(vizContinuousViolin,p_lt0.05No:logit_p) -0.19 0.18 -0.53
## cor(vizDiscreteViolin,p_lt0.05No:logit_p) 0.10 0.18 -0.26
## cor(p_lt0.05No,p_lt0.05No:logit_p) 0.90 0.07 0.71
## cor(logit_p,p_lt0.05No:logit_p) -0.47 0.17 -0.74
## cor(p_eq0.05No,p_lt0.05No:logit_p) -0.26 0.31 -0.77
## cor(zoi_Intercept,zoi_vizGradient) 0.38 0.42 -0.61
## cor(zoi_Intercept,zoi_vizContinuousViolin) -0.09 0.45 -0.85
## cor(zoi_vizGradient,zoi_vizContinuousViolin) 0.12 0.44 -0.75
## cor(zoi_Intercept,zoi_vizDiscreteViolin) 0.03 0.44 -0.78
## cor(zoi_vizGradient,zoi_vizDiscreteViolin) 0.15 0.44 -0.72
## cor(zoi_vizContinuousViolin,zoi_vizDiscreteViolin) 0.05 0.44 -0.80
## u-95% CI Rhat Bulk_ESS
## sd(Intercept) 1.73 1.00 1805
## sd(vizGradient) 0.88 1.00 2322
## sd(vizContinuousViolin) 0.90 1.00 2399
## sd(vizDiscreteViolin) 1.25 1.00 1793
## sd(p_lt0.05No) 1.23 1.00 1425
## sd(logit_p) 0.31 1.00 2093
## sd(p_eq0.05No) 0.34 1.00 2145
## sd(p_lt0.05No:logit_p) 0.45 1.00 1413
## sd(zoi_Intercept) 5.50 1.00 1532
## sd(zoi_vizGradient) 3.08 1.00 1851
## sd(zoi_vizContinuousViolin) 1.63 1.00 2570
## sd(zoi_vizDiscreteViolin) 2.25 1.00 1908
## sd(sigma_Intercept) 0.39 1.00 2094
## cor(Intercept,vizGradient) 0.28 1.00 2416
## cor(Intercept,vizContinuousViolin) 0.49 1.00 2325
## cor(vizGradient,vizContinuousViolin) 0.69 1.00 2134
## cor(Intercept,vizDiscreteViolin) 0.30 1.01 1736
## cor(vizGradient,vizDiscreteViolin) 0.49 1.00 1463
## cor(vizContinuousViolin,vizDiscreteViolin) 0.66 1.00 1492
## cor(Intercept,p_lt0.05No) -0.54 1.00 2439
## cor(vizGradient,p_lt0.05No) 0.35 1.00 2658
## cor(vizContinuousViolin,p_lt0.05No) 0.22 1.00 2446
## cor(vizDiscreteViolin,p_lt0.05No) 0.45 1.00 2376
## cor(Intercept,logit_p) 0.79 1.00 2000
## cor(vizGradient,logit_p) 0.41 1.00 1805
## cor(vizContinuousViolin,logit_p) 0.34 1.00 2078
## cor(vizDiscreteViolin,logit_p) 0.16 1.00 2467
## cor(p_lt0.05No,logit_p) 0.00 1.00 1516
## cor(Intercept,p_eq0.05No) 0.71 1.00 6954
## cor(vizGradient,p_eq0.05No) 0.63 1.00 6189
## cor(vizContinuousViolin,p_eq0.05No) 0.75 1.00 4951
## cor(vizDiscreteViolin,p_eq0.05No) 0.62 1.00 6389
## cor(p_lt0.05No,p_eq0.05No) 0.44 1.00 6094
## cor(logit_p,p_eq0.05No) 0.68 1.00 5273
## cor(Intercept,p_lt0.05No:logit_p) -0.57 1.00 2162
## cor(vizGradient,p_lt0.05No:logit_p) 0.31 1.00 3085
## cor(vizContinuousViolin,p_lt0.05No:logit_p) 0.18 1.00 2763
## cor(vizDiscreteViolin,p_lt0.05No:logit_p) 0.44 1.00 2717
## cor(p_lt0.05No,p_lt0.05No:logit_p) 0.98 1.00 1838
## cor(logit_p,p_lt0.05No:logit_p) -0.11 1.00 1984
## cor(p_eq0.05No,p_lt0.05No:logit_p) 0.41 1.00 3754
## cor(zoi_Intercept,zoi_vizGradient) 0.94 1.00 4286
## cor(zoi_Intercept,zoi_vizContinuousViolin) 0.76 1.00 7473
## cor(zoi_vizGradient,zoi_vizContinuousViolin) 0.85 1.00 4938
## cor(zoi_Intercept,zoi_vizDiscreteViolin) 0.80 1.00 6398
## cor(zoi_vizGradient,zoi_vizDiscreteViolin) 0.87 1.00 3762
## cor(zoi_vizContinuousViolin,zoi_vizDiscreteViolin) 0.82 1.00 4427
## Tail_ESS
## sd(Intercept) 3076
## sd(vizGradient) 4008
## sd(vizContinuousViolin) 3786
## sd(vizDiscreteViolin) 3229
## sd(p_lt0.05No) 2064
## sd(logit_p) 3479
## sd(p_eq0.05No) 2986
## sd(p_lt0.05No:logit_p) 2073
## sd(zoi_Intercept) 3131
## sd(zoi_vizGradient) 2832
## sd(zoi_vizContinuousViolin) 4035
## sd(zoi_vizDiscreteViolin) 2572
## sd(sigma_Intercept) 4326
## cor(Intercept,vizGradient) 3788
## cor(Intercept,vizContinuousViolin) 4054
## cor(vizGradient,vizContinuousViolin) 3928
## cor(Intercept,vizDiscreteViolin) 3320
## cor(vizGradient,vizDiscreteViolin) 2687
## cor(vizContinuousViolin,vizDiscreteViolin) 3297
## cor(Intercept,p_lt0.05No) 3648
## cor(vizGradient,p_lt0.05No) 3921
## cor(vizContinuousViolin,p_lt0.05No) 4104
## cor(vizDiscreteViolin,p_lt0.05No) 4132
## cor(Intercept,logit_p) 3472
## cor(vizGradient,logit_p) 3884
## cor(vizContinuousViolin,logit_p) 3301
## cor(vizDiscreteViolin,logit_p) 4093
## cor(p_lt0.05No,logit_p) 3018
## cor(Intercept,p_eq0.05No) 5579
## cor(vizGradient,p_eq0.05No) 5290
## cor(vizContinuousViolin,p_eq0.05No) 6162
## cor(vizDiscreteViolin,p_eq0.05No) 6208
## cor(p_lt0.05No,p_eq0.05No) 5488
## cor(logit_p,p_eq0.05No) 5523
## cor(Intercept,p_lt0.05No:logit_p) 3956
## cor(vizGradient,p_lt0.05No:logit_p) 4880
## cor(vizContinuousViolin,p_lt0.05No:logit_p) 4920
## cor(vizDiscreteViolin,p_lt0.05No:logit_p) 5098
## cor(p_lt0.05No,p_lt0.05No:logit_p) 2917
## cor(logit_p,p_lt0.05No:logit_p) 3757
## cor(p_eq0.05No,p_lt0.05No:logit_p) 4678
## cor(zoi_Intercept,zoi_vizGradient) 5014
## cor(zoi_Intercept,zoi_vizContinuousViolin) 5422
## cor(zoi_vizGradient,zoi_vizContinuousViolin) 4880
## cor(zoi_Intercept,zoi_vizDiscreteViolin) 5182
## cor(zoi_vizGradient,zoi_vizDiscreteViolin) 5348
## cor(zoi_vizContinuousViolin,zoi_vizDiscreteViolin) 5925
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -2.29 0.37 -3.01 -1.58
## sigma_Intercept -0.28 0.07 -0.42 -0.13
## zoi_Intercept -3.01 0.97 -5.00 -1.19
## coi_Intercept -2.46 0.34 -3.19 -1.82
## vizGradient 0.10 0.39 -0.67 0.85
## vizContinuousViolin -0.01 0.41 -0.81 0.80
## vizDiscreteViolin 0.98 0.43 0.14 1.84
## p_lt0.05No -0.30 0.30 -0.91 0.29
## logit_p -0.66 0.06 -0.79 -0.53
## p_eq0.05No -0.13 0.16 -0.44 0.19
## vizGradient:p_lt0.05No -0.40 0.33 -1.04 0.25
## vizContinuousViolin:p_lt0.05No -0.12 0.35 -0.82 0.58
## vizDiscreteViolin:p_lt0.05No -0.72 0.36 -1.43 -0.01
## vizGradient:logit_p 0.18 0.06 0.06 0.31
## vizContinuousViolin:logit_p 0.12 0.07 -0.02 0.25
## vizDiscreteViolin:logit_p 0.17 0.07 0.04 0.30
## p_lt0.05No:logit_p -0.03 0.09 -0.21 0.14
## vizGradient:p_eq0.05No 0.30 0.20 -0.10 0.69
## vizContinuousViolin:p_eq0.05No 0.32 0.22 -0.11 0.74
## vizDiscreteViolin:p_eq0.05No 0.10 0.22 -0.33 0.54
## vizGradient:p_lt0.05No:logit_p -0.24 0.08 -0.40 -0.07
## vizContinuousViolin:p_lt0.05No:logit_p -0.25 0.09 -0.42 -0.06
## vizDiscreteViolin:p_lt0.05No:logit_p -0.30 0.09 -0.48 -0.13
## sigma_vizGradient -0.22 0.08 -0.37 -0.06
## sigma_vizContinuousViolin -0.02 0.08 -0.19 0.14
## sigma_vizDiscreteViolin -0.01 0.08 -0.17 0.16
## zoi_vizGradient -0.35 1.07 -2.55 1.64
## zoi_vizContinuousViolin 0.03 0.95 -1.83 1.89
## zoi_vizDiscreteViolin -0.08 1.00 -2.13 1.84
## zoi_true_p0.01 -6.70 2.39 -11.95 -2.55
## zoi_true_p0.04 -3.71 1.70 -7.44 -0.74
## zoi_true_p0.05 -3.67 1.63 -7.09 -0.72
## zoi_true_p0.06 -2.01 1.28 -4.70 0.28
## zoi_true_p0.1 -3.70 1.70 -7.34 -0.70
## zoi_true_p0.5 0.77 0.88 -0.94 2.54
## zoi_true_p0.8 3.31 0.85 1.71 5.06
## zoi_vizGradient:true_p0.01 6.18 2.60 1.53 11.81
## zoi_vizContinuousViolin:true_p0.01 6.21 2.57 1.66 11.67
## zoi_vizDiscreteViolin:true_p0.01 5.56 2.62 0.77 11.20
## zoi_vizGradient:true_p0.04 1.40 2.18 -2.81 5.88
## zoi_vizContinuousViolin:true_p0.04 0.05 2.34 -4.58 4.69
## zoi_vizDiscreteViolin:true_p0.04 -0.12 2.37 -4.79 4.57
## zoi_vizGradient:true_p0.05 1.36 2.14 -2.81 5.64
## zoi_vizContinuousViolin:true_p0.05 1.65 2.05 -2.39 5.72
## zoi_vizDiscreteViolin:true_p0.05 -0.18 2.31 -4.74 4.35
## zoi_vizGradient:true_p0.06 -0.33 1.89 -4.22 3.31
## zoi_vizContinuousViolin:true_p0.06 -1.69 2.09 -5.94 2.28
## zoi_vizDiscreteViolin:true_p0.06 -1.82 2.13 -6.24 2.16
## zoi_vizGradient:true_p0.1 2.46 2.03 -1.41 6.61
## zoi_vizContinuousViolin:true_p0.1 -0.01 2.32 -4.47 4.62
## zoi_vizDiscreteViolin:true_p0.1 -0.14 2.36 -4.80 4.49
## zoi_vizGradient:true_p0.5 0.05 1.28 -2.47 2.57
## zoi_vizContinuousViolin:true_p0.5 -0.05 1.23 -2.46 2.38
## zoi_vizDiscreteViolin:true_p0.5 -0.78 1.29 -3.31 1.76
## zoi_vizGradient:true_p0.8 0.42 1.28 -2.00 3.00
## zoi_vizContinuousViolin:true_p0.8 -0.28 1.17 -2.54 2.03
## zoi_vizDiscreteViolin:true_p0.8 -0.65 1.20 -2.97 1.73
## coi_mocat_p 0.92 0.13 0.69 1.19
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.00 1289 2927
## sigma_Intercept 1.00 2694 4261
## zoi_Intercept 1.00 1104 2008
## coi_Intercept 1.00 6027 4681
## vizGradient 1.00 1677 2987
## vizContinuousViolin 1.00 1898 2872
## vizDiscreteViolin 1.00 1809 3083
## p_lt0.05No 1.00 1160 2277
## logit_p 1.00 1353 2603
## p_eq0.05No 1.00 2527 3926
## vizGradient:p_lt0.05No 1.00 1413 2671
## vizContinuousViolin:p_lt0.05No 1.00 1622 2697
## vizDiscreteViolin:p_lt0.05No 1.00 1563 2863
## vizGradient:logit_p 1.00 1502 3332
## vizContinuousViolin:logit_p 1.00 1704 3045
## vizDiscreteViolin:logit_p 1.00 1684 3526
## p_lt0.05No:logit_p 1.00 1321 2277
## vizGradient:p_eq0.05No 1.00 2849 4271
## vizContinuousViolin:p_eq0.05No 1.00 3114 4632
## vizDiscreteViolin:p_eq0.05No 1.00 3256 4926
## vizGradient:p_lt0.05No:logit_p 1.00 1634 3581
## vizContinuousViolin:p_lt0.05No:logit_p 1.00 1814 3444
## vizDiscreteViolin:p_lt0.05No:logit_p 1.00 1759 3720
## sigma_vizGradient 1.00 4641 4868
## sigma_vizContinuousViolin 1.00 4301 5810
## sigma_vizDiscreteViolin 1.00 4364 5265
## zoi_vizGradient 1.00 1695 2753
## zoi_vizContinuousViolin 1.00 1601 2737
## zoi_vizDiscreteViolin 1.00 1791 3004
## zoi_true_p0.01 1.00 1838 2803
## zoi_true_p0.04 1.00 2695 3666
## zoi_true_p0.05 1.00 2141 3525
## zoi_true_p0.06 1.00 2229 4022
## zoi_true_p0.1 1.00 2179 3757
## zoi_true_p0.5 1.00 1627 2481
## zoi_true_p0.8 1.00 1683 2906
## zoi_vizGradient:true_p0.01 1.00 1905 2746
## zoi_vizContinuousViolin:true_p0.01 1.00 1853 2819
## zoi_vizDiscreteViolin:true_p0.01 1.00 1958 2618
## zoi_vizGradient:true_p0.04 1.00 2925 3456
## zoi_vizContinuousViolin:true_p0.04 1.00 3453 4293
## zoi_vizDiscreteViolin:true_p0.04 1.00 3336 4029
## zoi_vizGradient:true_p0.05 1.00 2497 4068
## zoi_vizContinuousViolin:true_p0.05 1.00 2431 4686
## zoi_vizDiscreteViolin:true_p0.05 1.00 3047 5081
## zoi_vizGradient:true_p0.06 1.00 2800 4232
## zoi_vizContinuousViolin:true_p0.06 1.00 3432 4546
## zoi_vizDiscreteViolin:true_p0.06 1.00 3406 5464
## zoi_vizGradient:true_p0.1 1.00 2143 3293
## zoi_vizContinuousViolin:true_p0.1 1.00 2696 4434
## zoi_vizDiscreteViolin:true_p0.1 1.00 2897 4447
## zoi_vizGradient:true_p0.5 1.00 1910 3474
## zoi_vizContinuousViolin:true_p0.5 1.00 1942 3043
## zoi_vizDiscreteViolin:true_p0.5 1.00 2012 3586
## zoi_vizGradient:true_p0.8 1.00 1930 3101
## zoi_vizContinuousViolin:true_p0.8 1.00 1919 2996
## zoi_vizDiscreteViolin:true_p0.8 1.00 2087 3421
## coi_mocat_p 1.00 5952 5446
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## coi_mocat_p1[1] 0.87 0.07 0.72 0.97 1.00 8478 4650
## coi_mocat_p1[2] 0.04 0.04 0.00 0.13 1.00 8740 4119
## coi_mocat_p1[3] 0.04 0.03 0.00 0.13 1.00 9485 4539
## coi_mocat_p1[4] 0.05 0.05 0.00 0.18 1.00 7281 5264
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Now we look at some figures. First we draw some samples from posterior predictive distribution and see how well our simulated replications match with our data:
pp_check(fit_exp2, type = "hist", nsamples = 11)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pp_check(fit_exp2, type = "stat_grouped", group = "true_p", stat = "median")
## Using all posterior samples for ppc type 'stat_grouped' by default.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pp_check(fit_exp2, type = "stat_grouped", group = "viz", stat = "mean")
## Using all posterior samples for ppc type 'stat_grouped' by default.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Note the difference compared to the first experiment: There is much less extremely confident answers than in the first case which was pretty symmetrical between the zero and full confidence answers. These posterior predictive samples are nicely in line with the data (it is feasible that the data could have been generated by this model).
Now we are ready to analyze the results. First, the posterior curves of the confidence given the underlying p-value:
comb_exp2 <- fit_exp2$data %>%
data_grid(viz, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %>%
filter(interaction(logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %in%
unique(interaction(
fit_exp2$data$logit_p, fit_exp2$data$p_lt0.05,
fit_exp2$data$p_eq0.05, fit_exp2$data$cat_p,
fit_exp2$data$true_p)))
f_mu_exp2 <- posterior_epred(fit_exp2, newdata = comb_exp2, re_formula = NA)
d <- data.frame(value = c(f_mu_exp2),
p = rep(comb_exp2$true_p, each = nrow(f_mu_exp2)),
viz = rep(comb_exp2$viz, each = nrow(f_mu_exp2)),
iter = 1:nrow(f_mu_exp2))
levels(d$viz) <- c("Classic CI", "Gradient CI", "Cont. violin CI", "Disc. violin CI")
cols <- c("Classic CI" = "#0072B2",
"Gradient CI" = "#009E73", "Cont. violin CI" = "#CC79A7",
"Disc. violin CI" = "#E69F00")
sumr <- d %>% group_by(viz, p) %>%
summarise(Estimate = mean(value),
Q2.5 = quantile(value, 0.025),
Q97.5 = quantile(value, 0.975)) %>%
mutate(p = as.numeric(levels(p))[p])
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
x_ticks <- c(0.001, 0.01, 0.04, 0.06, 0.1, 0.5, 0.8)
y_ticks <- c(0.05, seq(0.1, 0.9, by = 0.1), 0.95)
dodge <- 0.19
p1 <- sumr %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p2 <- sumr %>% filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p <- p1 + coord_cartesian(xlim = c(0.001, 0.9), ylim = c(0.045, 0.95)) +
annotation_custom(
ggplotGrob(p2),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p
And the probability of extreme answer:
f_zoi_exp2 <- fitted(fit_exp2, newdata = comb_exp2, re_formula = NA, dpar = "zoi")
df_01_exp2 <- data.frame(
p = plogis(comb_exp2$logit_p),
viz = comb_exp2$viz,
f_zoi_exp2)
levels(df_01_exp2$viz) <- levels(d$viz)
y_ticks <- c(0.001, 0.01, seq(0.1,0.9,by=0.2))
p <- df_01_exp2 %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_linerange(aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(width=0.19)) +
geom_line(alpha=0.5, position = position_dodge(width=0.19)) +
ylab("Probability of all-or-none answer") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
theme_classic() +
scale_y_continuous(trans = "logit",
breaks = y_ticks, labels = y_ticks, minor_breaks = NULL) +
scale_x_continuous(trans = "logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10), legend.position = "bottom",
axis.title.x = element_text(size = 12),
axis.text.y = element_text(size = 10), axis.title.y = element_text(size = 12),
legend.text=element_text(size = 10), strip.text.x = element_text(size = 10))
p
Again we can compute the average drop in perceived confidence when moving from p = 0.04 to p = 0.06:
d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
summarise(mean = mean(difference), sd = sd(difference),
"2.5%" = quantile(difference, 0.025),
"97.5" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 5
## viz mean sd `2.5%` `97.5`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Classic CI 0.107 0.0336 0.0403 0.174
## 2 Gradient CI 0.0389 0.0305 -0.0209 0.100
## 3 Cont. violin CI -0.0199 0.0342 -0.0860 0.0475
## 4 Disc. violin CI 0.0698 0.0358 0.000260 0.141
There is a peculiar rise in confidence level in case of continuous Violin CI when the underlying p-value is 0.05, but overall, compared to the first experiment the results here do not show strong differences in cliff effect or dichotomous thinking, and actually is no clear signs of these phenomena in this experiment:
p <- d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
ggplot(aes(x = difference, fill = viz, colour = viz)) +
geom_density(bw = 0.01, alpha = 0.6) +
theme_classic() +
scale_fill_manual("Representation", values = cols) +
scale_colour_manual("Representation", values = cols) +
ylab("Posterior density") +
xlab("E[confidence(p=0.04) - confidence(p=0.06)]") +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.text = element_text(size = 14))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
p
postprob <- d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
group_by(iter) %>%
mutate(ci_vs_gradient = difference[viz == "Classic CI"] -
difference[viz == "Gradient CI"],
ci_vs_cviolin = difference[viz == "Classic CI"] -
difference[viz == "Cont. violin CI"],
ci_vs_dviolin = difference[viz == "Classic CI"] -
difference[viz == "Disc. violin CI"],
gradient_vs_cviolin = difference[viz == "Gradient CI"] -
difference[viz == "Cont. violin CI"],
gradient_vs_dviolin = difference[viz == "Gradient CI"] -
difference[viz == "Disc. violin CI"],
cviolin_vs_dviolin = difference[viz == "Cont. violin CI"] -
difference[viz == "Disc. violin CI"]) %>%
ungroup() %>% summarise(
"P(CI > gradient)" = mean(ci_vs_gradient > 0),
"P(CI > cviolin)" = mean(ci_vs_cviolin > 0),
"P(CI > dviolin)" = mean(ci_vs_dviolin > 0),
"P(gradient > cont violin)" = mean(gradient_vs_cviolin > 0),
"P(gradient > disc violin)" = mean(gradient_vs_dviolin > 0),
"P(cont violin > disc violin)" = mean(cviolin_vs_dviolin > 0))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
round(t(as.data.frame(postprob)), 2)
## [,1]
## P(CI > gradient) 0.96
## P(CI > cviolin) 1.00
## P(CI > dviolin) 0.80
## P(gradient > cont violin) 0.93
## P(gradient > disc violin) 0.22
## P(cont violin > disc violin) 0.02
Now we consider expanded model with with expertise as predictor:
fit_expertise <- brm(bf(
confidence ~
expertise * viz * p_lt0.05 * logit_p +
expertise * viz * p_eq0.05 +
(viz + p_lt0.05 * logit_p + p_eq0.05 | id),
zoi ~
expertise * viz + viz * true_p + (viz | id),
coi ~ mo(cat_p),
sigma ~ expertise * viz + (1 | id)),
data = data2,
family = logit_p_gaussian,
stanvars = stanvar(scode = stan_funs, block = "functions"),
chains = 4, cores = 4, iter = 2000, init = 0,
save_warmup = FALSE, save_all_pars = TRUE, refresh = 0)
comb_exp2 <- fit_expertise$data %>%
data_grid(expertise, viz, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %>%
filter(interaction(expertise, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %in%
unique(interaction(fit_expertise$data$expertise,
fit_expertise$data$logit_p, fit_expertise$data$p_lt0.05,
fit_expertise$data$p_eq0.05, fit_expertise$data$cat_p,
fit_expertise$data$true_p)))
f_mu_exp2 <- posterior_epred(fit_expertise, newdata = comb_exp2, re_formula = NA)
d <- data.frame(value = c(f_mu_exp2),
p = rep(comb_exp2$true_p, each = nrow(f_mu_exp2)),
viz = rep(comb_exp2$viz, each = nrow(f_mu_exp2)),
expertise = rep(comb_exp2$expertise, each = nrow(f_mu_exp2)),
iter = 1:nrow(f_mu_exp2))
levels(d$viz) <- c("Classic CI", "Gradient CI", "Cont. violin CI", "Disc. violin CI")
cols <- c("Classic CI" = "#0072B2",
"Gradient CI" = "#009E73", "Cont. violin CI" = "#CC79A7",
"Disc. violin CI" = "#E69F00")
Here are posterior curves for the four different groups:
sumr <- d %>% group_by(viz, p, expertise) %>%
summarise(Estimate = mean(value),
Q2.5 = quantile(value, 0.025),
Q97.5 = quantile(value, 0.975)) %>%
mutate(p = as.numeric(levels(p))[p])
## `summarise()` regrouping output by 'viz', 'p' (override with `.groups` argument)
x_ticks <- c(0.001, 0.01, 0.04, 0.06, 0.1, 0.5, 0.8)
y_ticks <- c(0.05, seq(0.1, 0.9, by = 0.1), 0.95)
dodge <- 0.19
p11 <- sumr %>% filter(expertise == "Stats/ML") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.margin = margin(t = -0.1, b = 0, unit = "cm"),
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07),
ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p21 <- sumr %>% filter(expertise == "Stats/ML") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
yrange <- c(min(sumr$Q2.5)-0.001, max(sumr$Q97.5) +0.001)
p1 <- p11 + coord_cartesian(xlim = c(0.001, 0.9),
ylim = yrange) +
annotation_custom(
ggplotGrob(p21),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p12 <- sumr %>% filter(expertise == "VIS/HCI") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks, minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14, margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14, margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p22 <- sumr %>% filter(expertise == "VIS/HCI") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p2 <- p12 + coord_cartesian(xlim = c(0.001, 0.9), ylim = yrange) +
annotation_custom(
ggplotGrob(p22),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p13 <- sumr %>% filter(expertise == "Social science and humanities") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks, minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14, margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14, margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p23 <- sumr %>% filter(expertise == "Social science and humanities") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p3 <- p13 + coord_cartesian(xlim = c(0.001, 0.9), ylim = yrange) +
annotation_custom(
ggplotGrob(p23),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p14 <- sumr %>% filter(expertise == "Physical and life sciences") %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks, minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14, margin = margin(t = -0.1, r = 0, b = -0.3, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14, margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p24 <- sumr %>% filter(expertise == "Physical and life sciences") %>%
filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p4 <- p14 + coord_cartesian(xlim = c(0.001, 0.9), ylim = yrange) +
annotation_custom(
ggplotGrob(p24),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
library(patchwork)
p <- (p1 + ggtitle("Stats/ML")) + (p2 + ggtitle("VIS/HCI")) +
(p3 + ggtitle("Social sciences and humanities")) +
(p4 + ggtitle("Physical and life sciences"))
p
And the expertise-specific cliff effects:
d %>% group_by(expertise, viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
summarise(mean = mean(difference), sd = sd(difference),
"2.5%" = quantile(difference, 0.025),
"97.5" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'expertise', 'viz' (override with `.groups` argument)
## `summarise()` regrouping output by 'expertise' (override with `.groups` argument)
## # A tibble: 16 x 6
## # Groups: expertise [4]
## expertise viz mean sd `2.5%` `97.5`
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Stats/ML Classic CI 0.114 0.0451 0.0265 0.204
## 2 Stats/ML Gradient CI 0.0348 0.0470 -0.0593 0.127
## 3 Stats/ML Cont. violin CI -0.00819 0.0605 -0.128 0.111
## 4 Stats/ML Disc. violin CI 0.0244 0.0566 -0.0863 0.136
## 5 VIS/HCI Classic CI 0.0697 0.0809 -0.0869 0.231
## 6 VIS/HCI Gradient CI 0.00564 0.102 -0.165 0.229
## 7 VIS/HCI Cont. violin CI -0.00516 0.0967 -0.191 0.187
## 8 VIS/HCI Disc. violin CI 0.116 0.0943 -0.0681 0.303
## 9 Social science and humanities Classic CI 0.116 0.0610 0.00240 0.237
## 10 Social science and humanities Gradient CI 0.0253 0.0512 -0.0703 0.127
## 11 Social science and humanities Cont. violin CI -0.0321 0.0585 -0.149 0.0822
## 12 Social science and humanities Disc. violin CI 0.0810 0.0649 -0.0473 0.209
## 13 Physical and life sciences Classic CI 0.168 0.124 -0.0696 0.430
## 14 Physical and life sciences Gradient CI 0.155 0.122 -0.0396 0.458
## 15 Physical and life sciences Cont. violin CI -0.0253 0.0957 -0.189 0.180
## 16 Physical and life sciences Disc. violin CI 0.113 0.0902 -0.0375 0.308
As with the model without expertise, we see no clear signs of cliff effect here.
Finally we again test how does the results depend on those confidence curves which have clearly positive slope or large increases in confidence when p-value increases.
We use simple linear model with logit-transformed p-value and trimmed logit-transformed confidence, and check whether the corresponding coefficient is clearly positive (arbitrarily chose as 0.1), and in addition to this we remove those curves where the difference between two consecutive confidence values are larger than 0.2 (this should of course be negative):
data <- readRDS("experiment2/data/exp2_data.rds")
outliers_slope <- data %>% group_by(id, viz) %>%
mutate(p_logit = qlogis(p), c_logit = qlogis(
ifelse(confidence == 0, 0.001, ifelse(confidence == 1, 0.999, confidence)))) %>%
summarize(
slope = coef(lm(c_logit ~ p_logit))[2]) %>%
filter(slope > 0.1)
outliers_diff <- data %>% group_by(id, viz) %>%
arrange(p, .by_group = TRUE) %>%
mutate(diff = c(0,diff(confidence)), neg_diff = any(diff > 0.2)) %>%
filter(neg_diff)
data_cleaned <- data %>%
filter(!(interaction(id,viz) %in%
interaction(outliers_slope$id, outliers_slope$viz))) %>%
filter(!(interaction(id,viz) %in%
interaction(outliers_diff$id, outliers_diff$viz)))
data_cleaned <- data_cleaned %>%
mutate(
logit_p = qlogis(p),
p_lt0.05 = factor(p < 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
p_eq0.05 = factor(p == 0.05, levels = c(TRUE, FALSE), labels = c("Yes", "No")),
cat_p = recode_factor(true_p,
"0.06" = ">0.05", "0.1" = ">0.05", "0.5" = ">0.05", "0.8" = ">0.05",
.ordered = TRUE))
fit_exp2 <- brm(bf(
confidence ~
viz * p_lt0.05 * logit_p +
viz * p_eq0.05 +
(viz + p_lt0.05 * logit_p + p_eq0.05 | id),
zoi ~
viz * true_p + (viz | id),
coi ~ mo(cat_p),
sigma ~ viz + (1 | id)),
data = data_cleaned,
family = logit_p_gaussian,
stanvars = stanvar(scode = stan_funs, block = "functions"),
chains = 4, cores = 4, iter = 2000, init = 0,
save_warmup = FALSE, refresh = 0)
comb_exp2 <- fit_exp2$data %>%
data_grid(viz, logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %>%
filter(interaction(logit_p, p_lt0.05, p_eq0.05, cat_p, true_p) %in%
unique(interaction(
fit_exp2$data$logit_p, fit_exp2$data$p_lt0.05,
fit_exp2$data$p_eq0.05, fit_exp2$data$cat_p,
fit_exp2$data$true_p)))
f_mu_exp2 <- posterior_epred(fit_exp2, newdata = comb_exp2, re_formula = NA)
d <- data.frame(value = c(f_mu_exp2),
p = rep(comb_exp2$true_p, each = nrow(f_mu_exp2)),
viz = rep(comb_exp2$viz, each = nrow(f_mu_exp2)),
iter = 1:nrow(f_mu_exp2))
levels(d$viz) <- c("Classic CI", "Gradient CI", "Cont. violin CI", "Disc. violin CI")
sumr <- d %>% group_by(viz, p) %>%
summarise(Estimate = mean(value),
Q2.5 = quantile(value, 0.025),
Q97.5 = quantile(value, 0.975)) %>%
mutate(p = as.numeric(levels(p))[p])
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
cols <- c("Classic CI" = "#0072B2",
"Gradient CI" = "#009E73", "Cont. violin CI" = "#CC79A7",
"Disc. violin CI" = "#E69F00")
x_ticks <- c(0.001, 0.01, 0.04, 0.06, 0.1, 0.5, 0.8)
y_ticks <- c(0.05, seq(0.1, 0.9, by = 0.1), 0.95)
dodge <- 0.19
p1 <- sumr %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(dodge), size = 0.1) +
geom_linerange(data = sumr %>% filter(p < 0.03 | p > 0.07),
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(dodge), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(dodge), size = 0.7, show.legend = FALSE) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = x_ticks, labels = x_ticks, minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14,
margin = margin(t = -0.1, r = 0, b = -0.1, l = 0, unit = "cm")),
axis.title.y = element_text(size = 14,
margin = margin(t = 0, r = -0.1, b = 0, l = -0.1, unit = "cm")),
legend.text = element_text(size = 14)) +
geom_rect(xmin = qlogis(0.03), xmax = qlogis(0.07), ymin = qlogis(0.31), ymax = qlogis(0.82),
color = "grey70", alpha = 0, linetype = "dashed", size = 0.1) +
guides(colour = guide_legend(override.aes = list(size = 1.5)))
p2 <- sumr %>% filter(p > 0.02 & p < 0.09) %>%
ggplot(aes(x = p, y = Estimate, colour = viz)) +
geom_line(position = position_dodge(0.1), size = 0.1) +
geom_linerange(
aes(ymin = Q2.5, ymax = Q97.5),
position = position_dodge(0.1), size = 0.3,
show.legend = FALSE) +
geom_point(position = position_dodge(0.1), size = 0.7) +
ylab("Confidence") + xlab("p-value") +
scale_color_manual("Representation", values = cols) +
scale_y_continuous(trans="logit", breaks = y_ticks,
minor_breaks = NULL, labels = y_ticks) +
scale_x_continuous(trans="logit",
breaks = c(0.04, 0.05, 0.06),
labels = c(0.04, 0.05, 0.06),
minor_breaks = NULL) +
theme_classic() +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.title.y = element_blank(),
plot.background = element_blank(),
plot.margin=unit(c(-4,-9,0,0), "mm"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12))
p <- p1 + coord_cartesian(xlim = c(0.001, 0.9), ylim = c(0.045, 0.95)) +
annotation_custom(
ggplotGrob(p2),
xmin = qlogis(0.2), xmax = qlogis(0.9), ymin = qlogis(0.3), ymax = qlogis(0.95))
p
d %>% group_by(viz, iter) %>%
summarise(difference = value[p == "0.04"] - value[p == "0.06"]) %>%
summarise(mean = mean(difference), sd = sd(difference),
"2.5%" = quantile(difference, 0.025),
"97.5" = quantile(difference, 0.975))
## `summarise()` regrouping output by 'viz' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 5
## viz mean sd `2.5%` `97.5`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Classic CI 0.125 0.0349 0.0571 0.194
## 2 Gradient CI 0.0524 0.0316 -0.00828 0.114
## 3 Cont. violin CI -0.0152 0.0345 -0.0820 0.0526
## 4 Disc. violin CI 0.0693 0.0372 -0.00311 0.143
Overall, the results are very similar to the analysis with the full data.
Read the data:
path <- "experiment2/data"
files <- list.files(path, pattern = "subjective", full.names = TRUE)
n <- length(files)
rankdata2 <- data.frame(id = rep(1:n, each = 4),
viz = factor(rep(c("violin2", "ci", "violin", "gradient")),
levels = c("violin2", "ci", "violin", "gradient")),
rank = factor(NA, levels = 1:4))
for(i in 1:n) {
fb <- fromJSON(files[i])
rankdata2$id[4*(i-1) + 1:4] <- strsplit(strsplit(files[i], "subjective")[[1]], ".txt")[[2]]
rankdata2$rank[4*(i-1) + 1:4] <- factor(fb$rank)
}
rankdata2$viz <- recode_factor(rankdata2$viz, "ci" = "CI",
"gradient" = "Gradient",
"violin" = "Continuous Violin",
"violin2" = "Discrete Violin")
rankdata2$viz <- relevel(rankdata2$viz, "CI")
rankdata2$rank <- factor(rankdata2$rank, ordered = TRUE)
rankdata2$id <- factor(rankdata2$id, levels = levels(data2$id))
ranks_exp2 <- distinct(inner_join(rankdata2, data2[, c("id", "viz", "expertise")]))
And fit the same model as in the first experiment:
fit_rank2 <- brm(rank ~ viz + (1 | id), family = cumulative,
data = ranks_exp2, refresh = 0)
saveRDS(fit_rank2, file = "experiment2/results/fit_rank2.rds")
The ranking probabilities:
effects_exp2 <- conditional_effects(fit_rank2, effects = "viz",
plot = FALSE, categorical = TRUE,
reformula=NA)
p <- ggplot(effects_exp2[[1]], aes(x = viz, y = estimate__, colour = cats__)) +
geom_point(position=position_dodge(0.5)) +
geom_errorbar(width=0.25, aes(ymin=lower__, ymax = upper__),
position = position_dodge(0.5)) +
theme_classic() +
ylab("Ranking \n probability") + xlab("Representation") +
scale_x_discrete(labels =
c("Classic CI", "Gradient CI", "Cont. violin CI", "Disc. violin CI")) +
scale_color_manual("Rank",
values = colsrank,
labels = c("1 (best)", "2", "3", "4 (worst)")) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(size = 12,hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.text = element_text(size = 14))
p
Preferences between different techniques seem to be quite similar, except there seems to be preferences towards discrete violin CI plot.