-
Notifications
You must be signed in to change notification settings - Fork 1
/
2022_week-33_open-psychometrics.R
154 lines (130 loc) · 6.67 KB
/
2022_week-33_open-psychometrics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# #TidyTuesday Week 33: Open Psychometrics by Tanya Shapiro
# Mostrar en una serie de datavis al estilo Cedric Scherer,
# en la visualizacion de los pinguinos, es decir,
# distribuciones y un scatterplot.
# Para el caso de las distribuciones usare todos los atributos.
# Sin embargo, para el scatter plot, reducire a 2 dimensiones todo el
# conjunto de datos con PCA. Seguidamente, mostrare todos los puntos pero
# solo resaltare los personajes de mis shows favoritos:
#
# Avatar: The Last Airbender
# Gossip Girl
# Hamilton
# Stranger Things
# The Umbrella Academy
library(dplyr)
library(ggplot2)
library(gghighlight)
library(ggrepel)
library(MetBrewer)
library(ggtext)
library(sysfonts)
library(showtext)
library(factoextra)
# ------ LOAD DATA ------
psychometrics_characters <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-08-16/psych_stats.csv")
# ------ SELECT PERSONALITY TRAITS------
vec_personalities <- c("cocky","independent","genius","eloquent","driven","chatty","adventurous",
"active","decisive","crazy")
# ------ FIND QUESTIONS FOR THE PERSONALITY TRAITS------
vec_questions <- psychometrics_characters %>%
filter(personality %in% vec_personalities) %>%
pull(question) %>% unique()
relevant_dataset <- psychometrics_characters %>%
filter(question %in% vec_questions) %>%
select(char_id, char_name, uni_id, uni_name, question, personality, avg_rating)
# ------ DATA WRANGLING------
pca_dataset <- relevant_dataset %>%
mutate(main_score = case_when(personality %in% vec_personalities ~ avg_rating,
T ~ 100-avg_rating),
main_personality = case_when(personality == 'timid' ~ 'cocky',
personality == 'codependent' ~ 'independent',
personality == 'dunce' ~ 'genius',
personality == 'unpolished' ~ 'eloquent',
personality == 'unambitious' ~ 'driven',
personality == 'reserved' ~ 'chatty',
personality == 'stick-in-the-mud' ~ 'adventurous',
personality == 'slothful' ~ 'active',
personality == 'hesitant' ~ 'decisive',
personality == 'sane' ~ 'crazy',
T ~ personality
)) %>%
select(char_id, char_name, uni_id, uni_name, main_personality, main_score) %>%
tidyr::pivot_wider(names_from = main_personality, values_from = main_score)
# ------ PCA------
numeric_data <- pca_dataset %>% select_if(is.numeric)
pca_results <- prcomp(numeric_data)
fviz_eig(pca_results)
# Keep the firts 2 PCA's
df <- pca_dataset %>%
bind_cols(pca_results$x %>%
as_tibble() %>%
select(PC1,PC2))
# ------ DATA VISUALIZATION------
font_add_google("Comfortaa", "titleFont")
font_add_google("Josefin Sans", "bodyFont")
showtext_auto()
title_font <- "titleFont"
body_font <- "bodyFont"
fav_shows <- c('Avatar: The Last Airbender', 'Gossip Girl', 'Hamilton', 'Stranger Things', 'The Umbrella Academy')
title_text <- "The Universe of Personalities"
subtitle_text <- "A fictional character does not have a real personality, but people might perceive it to have one. Thanks to this dataset, we can view the personality spectrum of the characters from more than 50 TV Shows or movies.<br>I selected ten personality traits (cocky, independent, genius, eloquent, driven, chatty, adventurous, active, decisive and crazy). Then, in order to see them in a 2-dimensional plane, I performed a PCA on them.<br>The highlighted points are characters from my favourite TV shows: <span style='color:#CF3A36'><b>Stranger Things</b></span>, <span style='color:#D6705C'><b>Gossip Girl</b></span>, <span style='color:#0C7156'><b>Avatar: The Last Airbender</b></span>, <span style='color:#EA7428'><b>Hamilton</b></span> and <span style='color:#663171'><b>The Umbrella Academy</b></span>."
caption_text <- "Designed by Isaac Arroyo (@unisaacarroyov on Twitter) <br>#TidyTuesday Week 33: Open Psychometrics by Tanya Shapiro (@tanya_shapiro on Twitter)"
p1 <- df %>%
ggplot(aes(x=PC1, y= PC2, colour= uni_name)) +
geom_point(size = 3) +
gghighlight(uni_name %in% fav_shows,
max_highlight = 50,
unhighlighted_params = list(color='gray50', alpha = 0.3, size = 2)
) +
geom_label_repel(aes(label=char_name), max.overlaps = 50, seed = 11,
force = 3,
arrow = grid::arrow(length = unit(5,'pt'), ends = 'last'),
point.padding = unit(10,"pt"),
nudge_x = -5,
nudge_y = 5,
label.size = 0.5,
size = 10,
family = body_font,
) +
scale_colour_manual(values = met.brewer("Java", n=5, direction = -1)) +
labs(title= title_text,
subtitle = subtitle_text,
caption = caption_text) +
theme_void() +
theme(
# Background
plot.background = element_rect(fill = '#F6E3BD', colour = '#F6E3BD'),
# Title
plot.title.position = "plot",
plot.title = element_textbox(family = title_font,
face = 'bold',
size = rel(9),
lineheight = 0.2,
padding = margin(0,0,0,0),
margin = margin(1/3,1/3,0,1/3, unit = "in")
),
# Subtitle
plot.subtitle = element_textbox(family = body_font,
size = rel(3.5),
lineheight = 0.2,
halign = 0, hjust = 0,
width = unit(8,'in'),
padding = margin(0,0,0,0),
margin = margin(0.125,1/3,0.125,1/3, unit = "in")
),
# Caption
plot.caption.position = "plot",
plot.caption = element_textbox(halign = 0, hjust = 0,
family = body_font,
face = 'bold',
size = rel(2.7),
lineheight = 0.5,
padding = margin(0,0,0,0),
margin = margin(0.125,1/3,1/3,1/3, unit = "in")
)
)
ggsave(filename = "./gallery_2022/2022_week-33_open-psychometrics.png",
plot = p1,
width = 8.5, height = 11, units = "in",
dpi = 300)