forked from rafalab/dsbook
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchapter_to_slides.R
415 lines (373 loc) · 16.8 KB
/
chapter_to_slides.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
#' chapter to slides
#'
#' Convert a bookdown chapter Rmd file to Rmd slides
#'
#' The function converts Rmd chapters from the Intro to Data Science book into Rmd ready
#' to be compiled in slides. It is somewhat hard-wired to the style used in this book.
#' It turns every sentence into an entry in a bullet point with a new page automatically started
#' after predetermined number o lines or characters is reached. If a section has titled exercise the section is
#' saved in a separate file and omitted from slides. The R chunks that do not include plotting functions
#' are preserved. Chunks that plot and do not include either eval=FALSE or echo=FALSE are copied twice
#' so that code shows in one page and the plot in another.
#'
#' Note that the yml header is hard wired. If you want to change, edit the line where `start` is defined.
#'
#' @details The output is should be checked before kniting as the output is rarely perfect. In particular,
#' think of the R output that will be generated by the R chunks as these are not seen and therefore not
#' counted when deciding when to start a new page.
#'
#' @author Rafael A. Irizarry
#'
#' @param input Input Rmd file to be converted.
#' @param output Name of output file without extension. Defaults to same as input.
#' @param output.dir Directory in which to save output file. Defaults to working directory.
#' @param output.exercise Name of output file for exercises. Defaults to -exercise appended to output.
#' @param suffix File extension. Defaults to Rmd
#' @param img.dir Directory to look for images in. Defaults to ./img
#' @param title Title for slides. Defaults to output with dash replaced by spaces and title case.
#' @param author Author for the slides
#' @param max.lines Number of lines per slide
#' @param chars.per.line Number of characters that define a line.
#' @param max.section.title.length If the section number is bigger than this it gets cut-off. Defaults to infinity.
#' @param save,exercises If true saves a separate file with exercises.
#' @param verbose If TRUE show information about line being processed.
#'
#' @return A data frame with counts for each group for each date with population sizes, if demo was provided.
#'
#' @examples
#' \dontrun{
#' rmd_to_slides("dsbood/inference/models.Rmd", "lectures/inference/models")
#' }
#' @export
#' @import stringr
chapter_to_slides <- function(input,
output = NULL,
output.dir = getwd(),
output.exercises = NULL,
suffix = "Rmd",
img.dir = "img",
title = NULL,
author = "Rafael A. Irizarry",
max.lines = 15,
chars.per.line = 60,
max.section.title.length = NULL,
save.exercises = TRUE,
verbose=FALSE){
## Version: 0.0.1
## License: Artistic-2.0
## Author: Rafael A. Irizarry
library(stringr)
if(is.null(output)){
output <- basename(input)
output <- gsub("\\.[a-z|A-Z|0-9]+$", "", output)
}
output <- file.path(output.dir, output)
file_name <- str_c(output, ".", suffix)
if(file.exists(file_name)) stop(file_name, " file exists. Pick a different filename or remove the file.")
start_section <- function(start.lines = 0, env = parent.frame()){
cat("\n\n", env$the_section, "\n\n", sep = "",
file = env$file_name, append = TRUE)
env$lines <- start.lines
env$page <- env$page + 1
}
## define filename for extracted exercises if not provided
if(is.null(output.exercises)){
output.exercises <- paste0(output, "-exercises")
}
## add suffix to filenames
exercise_file_name <- str_c(output.exercises, ".", suffix)
## define title if not provided
if(is.null(title)){
title <- str_replace_all(basename(output), "-", " ") |> str_to_title()
}
if(is.null(max.section.title.length)) max.section.title.length <- -1L
## read-in input line by line
x <- readLines(input)
## Remove empty lines
x <- x[!str_trim(x)==""]
## Remove comments
x <- x[!str_detect(x, "<!--")]
x <- x[!str_detect(x, "img_path\\s+<-")]
## The following code is only needed for files
## that check for knitr format to make tables
## this is very specific to files from the book
table_index <- str_which(x, "if\\(knitr::is_html_output\\(\\)\\)\\{")
if(length(table_index)>0){
out <- c()
for(i in table_index){
tab_name <- str_match(x[i+1], "kable\\(([a-z|A-Z|0-9|_].*),.*[latex|html].*")[1,2] ## grab name of table object
x[i] <- str_c(" knitr::kable(", tab_name, ")")
out <- c(out,i)
j <- i
ends <- 0
while(ends<1){
j <- j+1
out <- c(out,j)
if(str_detect(x[j], "\\}")) ends <- ends + 1
if(str_detect(x[j], "\\{")) ends <- ends - 1
}
}
x <- x[-out]
}
##line_type will store, for each line of x, what kind of line it is
##options are
## section header
## prose
## rchunk start inside or end
## exercise_start
## plot rchunk start inside or end
## quotes
## latex start inside or end
## table
## last line
line_type <- rep("prose", length(x))
## find section starts
section_starts <- str_which(x, "^\\#+\\s+")
## clean up section start names and cut if longer than certain size
x[section_starts] <- x[section_starts] |>
str_remove("\\{.*\\}") |>
str_replace_all("#+", "##") |>
str_trim() |>
str_sub(1L, max.section.title.length)
line_type[section_starts] <- "section"
## find exercise section starts
exercise_starts <- str_which(x, "## [Ee]xercise|## [Ee]jercicio")
line_type[exercise_starts] <- "exercise_start"
## find the latex start and ends
latex_inds <- str_which(x, "\\$\\$")
if(length(latex_inds) %% 2 > 0){
warning("Detected unclosed latex. Check output carefully.")
latex_inds <- c(latex_inds, last(latex_inds))
latex_start <-latex_inds[seq(1,length(latex_inds),2)]
latex_end <- latex_inds[seq(2,length(latex_inds),2)]
latex_size <- rep(0, length(x)) ## used to decide if start new section
latex_size[latex_start] <- pmax(1, latex_end - latex_start - 2)
## if start and end in same line make it the end
line_type[latex_start] <- "latex_start"
line_type[latex_end] <- "latex_end"
## find the insider of latex
for(i in seq_along(latex_start)){
st <- latex_start[i]
en <- latex_end[i]
if(st==en){
line_type[st] <- "latex_start_and_end" ## if onle line of latex use this
} else{
ind <- (st+1):(en-1)
if(length(ind)>0){
line_type[ind] <- "latex_inside"
}
}
}
}
## find start of tables
line_type[str_detect(str_trim(x), "^\\|")] <- "table"
rchunk_start <- c()
rchunk_end <- c()
## find the code chunk start and ends
start_flag <- TRUE
for(i in seq_along(x)){
if(str_detect(x[i], "^```")){
if(start_flag){
rchunk_start <- c(rchunk_start, i)
line_type[i] <- "rchunk_start"
start_flag <- FALSE
} else{
rchunk_end <- c(rchunk_end, i)
line_type[i] <- "rchunk_end"
start_flag <- TRUE
}
}
}
no_code <- which(rchunk_end - rchunk_start==1)
line_type[rchunk_start[no_code]] <- "dont_print"
line_type[rchunk_end[no_code]] <- "dont_print"
rchunk_start <- setdiff(rchunk_start, rchunk_start[no_code])
rchunk_end <- setdiff(rchunk_end, rchunk_end[no_code])
rchunk_size <- rep(0, length(x)) ## used to decide if start new section
if(length(rchunk_start)){
rchunk_size[rchunk_start] <- pmax(1, rchunk_end - rchunk_start - 2)
}
## Check if R chunk is a plot chunk and change if it is
rchunk_inds <- cbind(rchunk_start, rchunk_end)
plot_inds <- which(apply(rchunk_inds, 1, function(ind){
any(str_detect(x[ind[1]:ind[2]], "plot|hist"))
}))
plot_rchunk_start <- rchunk_start[plot_inds]
plot_rchunk_end <- rchunk_end[plot_inds]
line_type[plot_rchunk_start] <- "plot_rchunk_start"
line_type[plot_rchunk_end] <- "plot_rchunk_end"
## find quotes
quote_index <- str_which(x, "^>>.*")
line_type[quote_index] <- "quote"
## find the insider of r chunks
rchunk_size <- rep(0, length(x)) ## used to decide if start new section
rchunk_size[rchunk_start] <- rchunk_end - rchunk_start
for(i in seq_along(rchunk_start)){
ind <- (rchunk_start[i]+1):(rchunk_end[i]-1)
if(length(ind)>0) line_type[ind] <- "rchunk_inside"
}
## find the inside of plot r chunks
for(i in seq_along(plot_rchunk_start)){
ind <- (plot_rchunk_start[i]+1):(plot_rchunk_end[i]-1)
if(length(ind)>0) line_type[ind] <- "plot_rchunk_inside"
}
the_section <- ""
## the start is hard wired
start <- paste0('---\ntitle: "LECTURETITLE"\nauthor: "THEAUTHORNAME"\ndate: "`r lubridate::today()`"\noutput:\n ioslides_presentation:\n fig_caption: no\n fig_height: 5\n fig_width: 7\n out_width: "70%"\n beamer_presentation: default\n slidy_presentation: default\n---\n\n```{r setup, include=FALSE}\nlibrary(tidyverse)\nlibrary(dslabs)\nlibrary(gridExtra)\nlibrary(ggthemes)\nds_theme_set()\noptions(digits = 3)\nknitr::opts_chunk$set(\n comment = "#>",\n collapse = TRUE,\n cache = TRUE,\n out.width = "70%",\n fig.align = "center",\n fig.width = 6,\n fig.asp = 0.618, # 1 / phi\n fig.show = "hold"\n)\n\nimg_path <- "', img.dir,'"\n```')
start <- str_replace(start, "LECTURETITLE", title)
start <- str_replace(start, "THEAUTHORNAME", author)
## start filling in the file
cat(start, file = file_name)
## if there is at lease one exercise section starti filling in file
if(any(line_type=="exercise_start") & save.exercises) cat("", file = exercise_file_name)
exercise_flag <- FALSE
table_flag <- FALSE
table_start <- TRUE
## make last value a new lines
x[length(x)+1] <- "\n"
line_type[length(x)] <- "last_line"
## initialize values
chars <- 0
lines <- 0
page <- 0
## start going line by line
for(i in seq_along(x)){
if(verbose) cat("Page: ", page, ", Line: ", lines, ", Type: ", line_type[i],
", Section: ", the_section, "\n")
## if line is start of section, start section and initialize counts
## and turn of exercise flag (if previously true, exercise section has ended)
if(line_type[i]=="section"){
the_section <- x[i]
exercise_flag <- FALSE
start_section()
} else{
## if exercise start, turn on flag and start just printing out exercises to
## new file
if(line_type[i]=="exercise_start" | exercise_flag){
exercise_flag <- TRUE
if(save.exercises) cat(x[i], "\n", file = exercise_file_name, append = TRUE)
} else{
if(line_type[i]=="table" | table_flag){
if(table_start){
cat("\n", x[i], "\n", sep="", file = file_name, append = TRUE)
table_start <- FALSE
lines <- lines + 3
} else{
if(str_detect(str_trim(x[i+1]), "\\|")){ ##check if next line is table
cat(x[i], "\n", file = file_name, append = TRUE)
lines <- lines + 2
} else{ ##if next line not table, it's the end
cat(x[i], "\n\n", file = file_name, append = TRUE)
table_flag <- FALSE
table_start <- TRUE
lines <- lines + 3
}
}
} else{
## if its a quote add to slides
if(line_type[i] == "quote"){
lines <- lines + ceiling(nchar(x[i])/chars.per.line) + 1
cat(x[i], "\n\n", file = file_name, append = TRUE)
} else{
## R chunks that are not plots are just added to output
if(line_type[i] %in% c("rchunk_end", "rchunk_inside","rchunk_start",
"plot_rchunk_end", "plot_rchunk_inside",
"latex_inside")){
if(str_detect(line_type[i], "inside")) lines <- lines + 1
if(line_type[i] == "latex_inside") lines <- lines + 1 ## add one more for latex
if(line_type[i] == "rchunk_start"){
if(lines + rchunk_size[i] > max.lines) start_section()
}
cat(x[i], "\n", file = file_name, append = TRUE)
if(line_type[i] == "rchunk_end") cat("\n", file = file_name, append = TRUE)
if(line_type[i] == "plot_rchunk_end") start_section()
} else{
## If r chunk includes a plot we will add it twice
## one with eval=FALSE and once with echo=FALSE
## unless the code already specifies it's echo or eval
if(line_type[i] == "plot_rchunk_start"){
## if echo nor eval are defined
## we include the code twice, first with eval=FALSE,
## which is what the while lopp does,
## then after the while loop it adds a sectio header,
##the first line with echo=FALSE, and in the next
## iteration of the i for loop will continue adding the lines
## to see why, look at the previous if statement
if(!str_detect(x[i], "echo|eval")){
y <- str_replace(x[i], "\\}", ", eval=FALSE}")
cat(y, "\n", file = file_name, append = TRUE)
j <- i
while(line_type[j]!="plot_rchunk_end"){
j <- j + 1
cat(x[j], "\n", file = file_name, append = TRUE)
}
start_section()
y <- str_replace(x[i], "```\\{r,*\\s+([\\w|\\-]+)", "```\\{r \\1-run")
y <- str_replace(y, "\\}", ", echo=FALSE}")
cat(y, "\n", file = file_name, append = TRUE)
} else{
if(lines>2) start_section()
cat(x[i], "\n", file = file_name, append = TRUE)
}
} else{
## if we entry is a sentnce, we will split by periods
## and put each sentences as a bullet point
## the first three lines are two avoid spliting
## decimals, and abberviated titles.. might need more
## the trick is to covert points to commans, then convert back
## after the split
if(line_type[i] == "prose"){
x[i] <- str_trim(x[i])
x[i] <- str_replace_all(x[i], "^(\\d+)\\.\\s+", "\\1PERIOD ")
x[i] <- str_replace_all(x[i], "(\\d)\\.(\\d)", "\\1PERIOD\\2")
x[i] <- str_replace_all(x[i], "(Mr|Ms|Dr)\\.", "\\1PERIOD")
y <- str_split(x[i], "\\.\\s+")[[1]]
for(j in seq_along(y)){
## convert back to periods
y[j] <- str_replace_all(y[j], "^(\\d+)PERIOD ", "\\1. ")
y[j] <- str_replace_all(y[j], "(\\d)PERIOD(\\d)", "\\1.\\2")
y[j] <- str_replace_all(y[j], "(Mr|Ms|Dr)PERIOD", "\\1.")
y[j] <- str_trim(y[j])
lines <- lines + ceiling(nchar(y[j])/chars.per.line) + 1
## if we have gone past max lines start a new section
if(lines > max.lines){
start_section(ceiling(nchar(y[j])/chars.per.line) + 1)
}
## add a period at end of bullet point unless we already have
## punctuation
if(!str_sub(y[j], nchar(y[j]), nchar(y[j])) %in% c(".","?",":",",")){
y[j] <- y[j] <- str_c(y[j],".")
}
if(str_detect(y[j], "^\\d+\\.")){
cat(" ", y[j], "\n\n", sep = "",
file = file_name, append = TRUE)
} else{
cat("- ", y[j], "\n\n", sep = "",
file = file_name, append = TRUE)
}
}
} else{
if(line_type[i] == "latex_start"){
if(lines + latex_size[i]*2 > max.lines) start_section()
cat("\n", x[i], "\n", sep = "", file = file_name, append = TRUE)
lines <- lines + 1
} else{
if(line_type[i] == "latex_end"){
cat(x[i], "\n\n", sep = "", file = file_name, append = TRUE)
lines <- lines + 1
} else{
if(line_type[i] == "latex_start_and_end"){
cat("\n", x[i], "\n\n", sep = "", file = file_name, append = TRUE)
lines <- lines + 3
}
}
}
}
}
}
}
}
}
}
}
}