rstudio · gadenbuie · Jan 28, 2022 · Jan 20, 2022 · Jan 20, 2022 · Jan 20, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -50,6 +50,8 @@
 
 -   Users are now warned if their submission contains blanks they are expected to fill in. The default blank pattern is three or more underscores, e.g. `____`. The pattern for blanks can be set with the `exercise.blanks` chunk or tutorial option (@rossellhayes #547).
 
+-   Users are now warned if their submission contains unparsable code caused by non-ASCII characters. This can commonly occur when students copy-and-paste code from a source that applies automatic Unicode formatting to text. If the submission contains Unicode formatted quotation marks (e.g. curly quotes) or dashes, the student is given a suggested replacement with ASCII characters. In other cases, the  student is simply prompted to delete the non-ASCII characters and retype them manually. No message is displayed if non-ASCII characters are included in parsable code (e.g. in characters strings or valid variable names) (@rossellhayes #642).
+
 -   Authors can choose to reveal (default) or hide the solution to an exercise. Set `exercise.reveal_solution` in the chunk options of a `*-solution` chunk to choose whether or not the solution is revealed to the user. The option can also be set globally with `tutorial_options()`. In a future version of learnr, the default will likely be changed to hide solutions (#402).
 
 -   Feedback messages can now be an `htmltools::tag()`, `htmltools::tagList()`, or a character message (#458) (#458)

diff --git a/R/exercise.R b/R/exercise.R
@@ -918,6 +918,22 @@ exercise_check_code_is_parsable <- function(exercise) {
     }
   }
 
+  unicode_feedback <- exercise_check_unparsable_unicode(exercise$code)
+  if (!is.null(unicode_feedback)) {
+    return(
+      exercise_result(
+        list(
+          message = HTML(unicode_feedback),
+          correct = FALSE,
+          location = "append",
+          type = "error"
+        ),
+        html_output = error_message_html(error$message),
+        error_message = error$message
+      )
+    )
+  }
+
   exercise_result(
     list(
       message = HTML(
@@ -935,6 +951,90 @@ exercise_check_code_is_parsable <- function(exercise) {
   )
 }
 
+exercise_check_unparsable_unicode <- function(code) {
+  # Early exit if code is made up of all ASCII characters
+  if (!grepl("[^\\x00-\\x7F]", code, perl = TRUE)) {
+    return(NULL)
+  }
+
+  # Check if code contains Unicode quotation marks
+  if (grepl("\\p{Pi}|\\p{Pf}", code, perl = TRUE)) {
+    character <- str_extract(code, "\\p{Pi}|\\p{Pf}", perl = TRUE)
+    lint <- exercise_highlight_unparsable_unicode(code, "\\p{Pi}|\\p{Pf}")
+
+    # Replace curly single quotes with straight single quotes
+    suggestion <- gsub("[\\x{2018}\\x{2019}]", "'", code, perl = TRUE)
+    # Replace all other Unicode quotes with straight double quotes
+    suggestion <- gsub("\\p{Pi}|\\p{Pf}", '"', suggestion, perl = TRUE)
+    suggestion <- as.character(htmltools::pre(htmltools::code(suggestion)))
+
+    return(
+      i18n_span(
+        "text.unparsablequotes",
+        HTML(i18n_translations()$en$translation$text$unparsablequotes),
+        opts = list(
+          character = character,
+          code = lint,
+          suggestion = suggestion,
+          interpolation = list(escapeValue = FALSE)
+        )
+      )
+    )
+  }
+
+  # Check if code contains Unicode dashes
+  if(grepl("[^\\P{Pd}-]", code, perl = TRUE)) {
+    character <- str_extract(code, "[^\\P{Pd}-]", perl = TRUE)
+    lint <- exercise_highlight_unparsable_unicode(code, "[^\\P{Pd}-]")
+
+    # Replace Unicode dashes with ASCII hyphen-minus
+    suggestion <- gsub("[^\\P{Pd}-]", "-", code, perl = TRUE)
+    suggestion <- as.character(htmltools::pre(htmltools::code(suggestion)))
+
+    return(
+      i18n_span(
+        "text.unparsableunicodesuggestion",
+        HTML(i18n_translations()$en$translation$text$unparsablequotes),
+        opts = list(
+          character = character,
+          code = lint,
+          suggestion = suggestion,
+          interpolation = list(escapeValue = FALSE)
+        )
+      )
+    )
+  }
+
+  # Return simpler message for any other non-ASCII characters
+  character <- str_extract(code, "[^\\x00-\\x7F]", perl = TRUE)
+  lint <- exercise_highlight_unparsable_unicode(code, "[^\\x00-\\x7F]")
+
+  return(
+    i18n_span(
+      "text.unparsableunicode",
+      HTML(i18n_translations()$en$translation$text$unparsablequotes),
+      opts = list(
+        character = character,
+        code = lint,
+        interpolation = list(escapeValue = FALSE)
+      )
+    )
+  )
+}
+
+exercise_highlight_unparsable_unicode <- function(code, pattern) {
+  highlighted_code <- gsub(
+    pattern = paste0("(", pattern, ")"),
+    replacement = "<mark>\\1</mark>",
+    x = code,
+    perl = TRUE
+  )
+
+  as.character(
+    htmltools::pre(htmltools::code(htmltools::HTML(highlighted_code)))
+  )
+}
+
 exercise_result_timeout <- function() {
   exercise_result_error(
     "Error: Your code ran longer than the permitted timelimit for this exercise.",

diff --git a/R/utils.R b/R/utils.R
@@ -95,6 +95,9 @@ str_replace <- function(x, pattern, replacement) {
 str_remove <- function(x, pattern) {
   str_replace(x, pattern, "")
 }
+str_extract <- function(x, pattern, ...) {
+  unlist(regmatches(x, regexpr(pattern, x, ...)))
+}
 
 is_tags <- function(x) {
   inherits(x, "shiny.tag") ||

diff --git a/data-raw/i18n_translations.yml b/data-raw/i18n_translations.yml
@@ -408,6 +408,62 @@ text:
       또는 <code>&quot;</code>, <code>'</code>, <code>(</code>
       , <code>{</code>로 시작하는 구문을 닫는 <code>&quot;</code>, <code>'</code>,
       <code>)</code>, <code>}</code>을 잊었을 수도 있습니다.
+  unparsablequotes:
+    en: >
+      It looks like your R code contains specially formatted quotation marks
+      or &quot;curly&quot; quotes (<code>{{character}}</code>)
+      around character strings, making your code invalid.
+      R requires character values to be contained in straight quotation
+      marks (<code>&quot;</code> or <code>'</code>).
+      {{code}}
+      Don't worry, this is a common source of errors when you copy code from
+      another app that applies its own formatting to text.
+      You can try replacing your input with this code:
+      {{suggestion}}
+    fr: ~
+    es: ~
+    pt: ~
+    tr: ~
+    emo: ~
+    eu: ~
+    de: ~
+    ko: ~
+  unparsableunicode:
+    en: >
+      It looks like your R code contains an unexpected special character
+      (<code>{{character}}</code>) that makes your code invalid.
+      {{code}}
+      Sometimes your code may contain a special character that looks like a
+      regular character, especially if you copy and paste the code from
+      another app.
+      Try deleting the special character from your code and retyping
+      it manually.
+    fr: ~
+    es: ~
+    pt: ~
+    tr: ~
+    emo: ~
+    eu: ~
+    de: ~
+    ko: ~
+  unparsableunicodesuggestion:
+    en: >
+      It looks like your R code contains an unexpected special character
+      (<code>{{character}}</code>) that makes your code invalid.
+      {{code}}
+      Sometimes your code may contain a special character that looks like a
+      regular character, especially if you copy and paste the code from
+      another app.
+      You can try replacing your input with this code:
+      {{suggestion}}
+    fr: ~
+    es: ~
+    pt: ~
+    tr: ~
+    emo: ~
+    eu: ~
+    de: ~
+    ko: ~
   and:
     en: "and"
     fr: "et"

diff --git a/inst/internals/i18n_random_phrases.rds b/inst/internals/i18n_random_phrases.rds
diff --git a/inst/internals/i18n_translations.rds b/inst/internals/i18n_translations.rds
diff --git a/tests/testthat/test-exercise.R b/tests/testthat/test-exercise.R
@@ -1037,6 +1037,56 @@ test_that("Errors with global setup code result in an internal error", {
   expect_match(conditionMessage(res$feedback$error), "boom")
 })
 
+# Unparsable Unicode ------------------------------------------------------
+
+test_that("evaluate_exercise() returns message for unparsable non-ASCII code", {
+  ex     <- mock_exercise(user_code = 'str_detect(“test”, “t.+t”)')
+  result <- evaluate_exercise(ex, new.env())
+  expect_equal(result$feedback, exercise_check_code_is_parsable(ex)$feedback)
+  expect_match(result$feedback$message, "text.unparsablequotes")
+  expect_match(
+    result$feedback$message,
+    i18n_translations()$en$translation$text$unparsablequotes,
+    fixed = TRUE
+  )
+
+  ex     <- mock_exercise(user_code = 'str_detect(‘test’, ‘t.+t’)')
+  result <- evaluate_exercise(ex, new.env())
+  expect_equal(result$feedback, exercise_check_code_is_parsable(ex)$feedback)
+  expect_match(result$feedback$message, "text.unparsablequotes")
+  expect_match(
+    result$feedback$message,
+    i18n_translations()$en$translation$text$unparsablequotes,
+    fixed = TRUE
+  )
+
+  ex     <- mock_exercise(user_code = '63 – 21')
+  result <- evaluate_exercise(ex, new.env())
+  expect_equal(result$feedback, exercise_check_code_is_parsable(ex)$feedback)
+  expect_match(result$feedback$message, "text.unparsableunicodesuggestion")
+  expect_match(
+    result$feedback$message,
+    i18n_translations()$en$translation$text$unparsablequotes,
+    fixed = TRUE
+  )
+
+  ex     <- mock_exercise(user_code = '63 ± 21')
+  result <- evaluate_exercise(ex, new.env())
+  expect_equal(result$feedback, exercise_check_code_is_parsable(ex)$feedback)
+  expect_match(result$feedback$message, "text.unparsableunicode")
+  expect_match(
+    result$feedback$message,
+    i18n_translations()$en$translation$text$unparsablequotes,
+    fixed = TRUE
+  )
+})
+
+test_that("evaluate_exercise() does not return a message for parsable non-ASCII code", {
+  skip_on_os("windows")
+  ex     <- mock_exercise(user_code = 'μεταβλητή <- "What‽"')
+  result <- evaluate_exercise(ex, new.env())
+  expect_null(result$feedback)
+})
 
 # Timelimit ---------------------------------------------------------------