Look at evidence for formal training in CS/programming among those who use R.
Use 2018 Stack Overflow Annual Developer Survey: https://insights.stackoverflow.com/survey
Original code from Julia Silge, data scientist at Stack Overflow. Modified by Jenny Bryan.
Load packages.
Make sure we have the data.
survey_path <- here(
if (!file.exists(survey_path)) {
## consults Content-Description to get filename
dl <- usethis:::download_zip(
url = "https://drive.google.com/uc?export=download&id=1_9On2-nsBQIw3JiY43sWbrF8EjrqrR4U",
destdir = here("stackoverflow-survey")
target <- here(
utils::unzip(dl, exdir = target)
Load the data.
theme_set(theme_minimal(base_family="Source Sans Pro"))
survey2018 <- read_csv(survey_path)
What kinds of majors do R users have?
users_majors <- survey2018 %>%
select(Respondent, LanguageWorkedWith, UndergradMajor) %>%
filter(!is.na(UndergradMajor)) %>%
mutate(LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = ";")) %>%
unnest(LanguageWorkedWith) %>%
group_by(Respondent) %>%
summarize(UsesR = "R" %in% LanguageWorkedWith,
UndergradMajor = first(UndergradMajor))
counts_major <- users_majors %>%
count(UsesR, UndergradMajor) %>%
mutate(UsesR = if_else(UsesR, "useR", "Other")) %>%
spread(UsesR, n, fill = 0)
logratio_major <- counts_major %>%
mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>%
mutate(logratio = log2(useR / Other)) %>%
arrange(desc(logratio)) %>%
UndergradMajor = reorder(UndergradMajor, logratio),
Direction = factor(if_else(logratio > 0, "useRs", "Other")),
Direction = forcats::fct_reorder(Direction, logratio, .desc = TRUE)
UndergradMajor | Other | useR |
A business discipline (ex. accounting, finance, marketing) | 1750 | 171 |
A health science (ex. nursing, pharmacy, radiology) | 217 | 29 |
A humanities discipline (ex. literature, history, philosophy) | 1487 | 103 |
A natural science (ex. biology, chemistry, physics) | 2561 | 489 |
A social science (ex. anthropology, psychology, political science) | 1122 | 255 |
Another engineering discipline (ex. civil, electrical, mechanical) | 6575 | 370 |
Computer science, computer engineering, or software engineering | 48340 | 1996 |
Fine arts or performing arts (ex. graphic design, music, studio art) | 1105 | 30 |
I never declared a major | 677 | 16 |
Information systems, information technology, or system administration | 6307 | 200 |
Mathematics or statistics | 2236 | 582 |
Web development or web design | 2397 | 21 |
UndergradMajor | Other | useR | logratio | Direction |
Mathematics or statistics | 0.0299120 | 0.1364062 | 2.1891119 | useRs |
A social science (ex. anthropology, psychology, political science) | 0.0150162 | 0.0598971 | 1.9959672 | useRs |
A natural science (ex. biology, chemistry, physics) | 0.0342577 | 0.1146467 | 1.7426926 | useRs |
A health science (ex. nursing, pharmacy, radiology) | 0.0029150 | 0.0070192 | 1.2678157 | useRs |
A business discipline (ex. accounting, finance, marketing) | 0.0234135 | 0.0402433 | 0.7814108 | useRs |
A humanities discipline (ex. literature, history, philosophy) | 0.0198968 | 0.0243332 | 0.2903903 | useRs |
Another engineering discipline (ex. civil, electrical, mechanical) | 0.0879309 | 0.0868039 | -0.0186098 | Other |
Computer science, computer engineering, or software engineering | 0.6463910 | 0.4672438 | -0.4682317 | Other |
Information systems, information technology, or system administration | 0.0843473 | 0.0470285 | -0.8428058 | Other |
Fine arts or performing arts (ex. graphic design, music, studio art) | 0.0147889 | 0.0072532 | -1.0278300 | Other |
I never declared a major | 0.0090659 | 0.0039775 | -1.1885692 | Other |
Web development or web design | 0.0320648 | 0.0051474 | -2.6390749 | Other |
p <- logratio_major %>%
group_by(Direction) %>%
ggplot(aes(UndergradMajor, logratio, fill = Direction)) +
geom_col(alpha = 0.9) +
coord_flip() +
scale_y_continuous(breaks = seq(-2, 2),
labels = c("0.25x", "0.5x", "Same", "2x", "4x"))
## Julia's original
p +
labs(y = "Relatively more from R users", x = NULL,
fill = "More likely from...",
subtitle = "R users are less likely to have formal programming training",
title = "What kinds of undergrad majors do R users have?")
## For use in Keynote
p +
labs(y = "Relative prevalence", x = NULL,
fill = "Major is more common among",
caption = "Julia Silge & Jenny Bryan\nSource: 2018 Stack Overflow Annual Developer Survey") +
legend.position = "top",
legend.title = element_text(size = rel(1.4)),
axis.text.y = element_text(size = rel(1.3))