Split inputs by newlines (#536)

* Add `lines()` util to preserve trailing lines * Draft `handle_input()` * WIP handling for pending line in `read_console()` * Fix output of intermediate expressions * Make detection of prompt types more robust * Rework `lines()` to return an iterator * Flesh out safety bullet * Clean up `buffer_overflow_call()` * Remove comment * Update comment about incomplete input handling * Throw an R error if we see incomplete user inputs Should only happen in Jupyter Notebooks. Positron should always check for complete inputs before sending them through. * Add a note about input replies and incomplete results * Add more comments about input handling * Return error from `check_console_input()` * Document the ReadConsole state machine * Panic when an incomplete prompt is detected in `ReadConsole` * Document current issue with large input errors * Tweak tests * Set `R_CLI_HIDE_CURSOR` after all * Return an `amalthea::Result<()>` from `on_console_input()` Allowing us to manually throw an R error from the POD site * We are happy with this being an error * Update some documentation * Clean up long single line test * Add basic stdin test, and stdin buffer overflow test * Turn off stack limit on Windows too And add tests that its actually off during harp/ark `r_task()` tests and frontend integration tests * Collect IOPub Stream messages in batches * Correct ordering in `report_error!` * Group structs and impls together * Also test for prefix stream * Mention fix in changelog * Add tests for incompete inputs and browser prompts * Improve readline prompt detection in browser sessions posit-dev/positron#4742 * Add test for errors in browser * Update comments * Add test for readline in browser sessions * Move `r_task()` initialization after R was set up --------- Co-authored-by: Lionel Henry <lionel.hry@proton.me> Co-authored-by: Davis Vaughan <davis@posit.co>
posit-dev · Oct 4, 2024 · 3663460 · 3663460
1 parent 0a06ddf
commit 3663460
Show file tree

Hide file tree

Showing 15 changed files with 858 additions and 115 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 # 0.1.9000
 
+- Sending long inputs of more than 4096 bytes no longer fails (posit-dev/positron#4745).
+
 - Jupyter: Fixed a bug in the kernel-info reply where the `pygments_lexer` field
   would be set incorrectly to `""` (#553).
 

diff --git a/crates/amalthea/src/error.rs b/crates/amalthea/src/error.rs
@@ -43,6 +43,7 @@ pub enum Error {
     UnknownCommId(String),
     InvalidCommMessage(String, String, String),
     InvalidInputRequest(String),
+    InvalidConsoleInput(String),
 }
 
 impl std::error::Error for Error {}
@@ -193,6 +194,9 @@ impl fmt::Display for Error {
             Error::InvalidInputRequest(message) => {
                 write!(f, "{message}")
             },
+            Error::InvalidConsoleInput(message) => {
+                write!(f, "{message}")
+            },
         }
     }
 }

diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs
@@ -13,6 +13,7 @@ use crate::session::Session;
 use crate::socket::socket::Socket;
 use crate::wire::execute_input::ExecuteInput;
 use crate::wire::execute_request::ExecuteRequest;
+use crate::wire::input_reply::InputReply;
 use crate::wire::jupyter_message::JupyterMessage;
 use crate::wire::jupyter_message::Message;
 use crate::wire::jupyter_message::ProtocolMessage;
@@ -36,6 +37,10 @@ pub struct DummyFrontend {
     heartbeat_port: u16,
 }
 
+pub struct ExecuteRequestOptions {
+    pub allow_stdin: bool,
+}
+
 impl DummyFrontend {
     pub fn new() -> Self {
         use rand::Rng;
@@ -139,13 +144,13 @@ impl DummyFrontend {
         id
     }
 
-    pub fn send_execute_request(&self, code: &str) -> String {
+    pub fn send_execute_request(&self, code: &str, options: ExecuteRequestOptions) -> String {
         self.send_shell(ExecuteRequest {
             code: String::from(code),
             silent: false,
             store_history: true,
             user_expressions: serde_json::Value::Null,
-            allow_stdin: false,
+            allow_stdin: options.allow_stdin,
             stop_on_error: false,
         })
     }
@@ -248,22 +253,48 @@ impl DummyFrontend {
         })
     }
 
-    pub fn recv_iopub_stream_stdout(&self) -> String {
-        let msg = self.recv_iopub();
-
-        assert_matches!(msg, Message::StreamOutput(data) => {
-            assert_eq!(data.content.name, Stream::Stdout);
-            data.content.text
-        })
+    pub fn recv_iopub_stream_stdout(&self, expect: &str) {
+        self.recv_iopub_stream(expect, Stream::Stdout)
     }
 
-    pub fn recv_iopub_stream_stderr(&self) -> String {
-        let msg = self.recv_iopub();
+    pub fn recv_iopub_stream_stderr(&self, expect: &str) {
+        self.recv_iopub_stream(expect, Stream::Stderr)
+    }
 
-        assert_matches!(msg, Message::StreamOutput(data) => {
-            assert_eq!(data.content.name, Stream::Stderr);
-            data.content.text
-        })
+    /// Receive from IOPub Stream
+    ///
+    /// Stdout and Stderr Stream messages are buffered, so to reliably test against them
+    /// we have to collect the messages in batches on the receiving end and compare against
+    /// an expected message.
+    fn recv_iopub_stream(&self, expect: &str, stream: Stream) {
+        let mut out = String::new();
+
+        loop {
+            // Receive a piece of stream output (with a timeout)
+            let msg = self.recv_iopub();
+
+            // Assert its type
+            let piece = assert_matches!(msg, Message::StreamOutput(data) => {
+                assert_eq!(data.content.name, stream);
+                data.content.text
+            });
+
+            // Add to what we've already collected
+            out += piece.as_str();
+
+            if out == expect {
+                // Done, found the entire `expect` string
+                return;
+            }
+
+            if !expect.starts_with(out.as_str()) {
+                // Something is wrong, message doesn't match up
+                panic!("Expected IOPub stream of '{expect}'. Actual stream of '{out}'.");
+            }
+
+            // We have a prefix of `expect`, but not the whole message yet.
+            // Wait on the next IOPub Stream message.
+        }
     }
 
     /// Receive from IOPub and assert ExecuteResult message. Returns compulsory
@@ -276,6 +307,21 @@ impl DummyFrontend {
         })
     }
 
+    /// Receive from Stdin and assert `InputRequest` message.
+    /// Returns the `prompt`.
+    pub fn recv_stdin_input_request(&self) -> String {
+        let msg = self.recv_stdin();
+
+        assert_matches!(msg, Message::InputRequest(data) => {
+            data.content.prompt
+        })
+    }
+
+    /// Send back an `InputReply` to an `InputRequest` over Stdin
+    pub fn send_stdin_input_reply(&self, value: String) {
+        self.send_stdin(InputReply { value })
+    }
+
     /// Receives a (raw) message from the heartbeat socket
     pub fn recv_heartbeat(&self) -> zmq::Message {
         let mut msg = zmq::Message::new();
@@ -339,3 +385,9 @@ impl DummyFrontend {
         }
     }
 }
+
+impl Default for ExecuteRequestOptions {
+    fn default() -> Self {
+        Self { allow_stdin: false }
+    }
+}
diff --git a/crates/amalthea/src/kernel.rs b/crates/amalthea/src/kernel.rs
@@ -38,7 +38,7 @@ use crate::wire::jupyter_message::Message;
 use crate::wire::jupyter_message::OutboundMessage;
 
 macro_rules! report_error {
-    ($($arg:tt)+) => (if cfg!(debug_assertions) { log::error!($($arg)+) } else { panic!($($arg)+) })
+    ($($arg:tt)+) => (if cfg!(debug_assertions) { panic!($($arg)+) } else { log::error!($($arg)+) })
 }
 
 /// A Kernel represents a unique Jupyter kernel session and is the host for all

diff --git a/crates/ark/src/analysis/input_boundaries.rs b/crates/ark/src/analysis/input_boundaries.rs
@@ -65,20 +65,13 @@ impl InputBoundary {
 ///   an invalid one, and invalid inputs are always trailing).
 /// - There is only one incomplete and one invalid input in a set of inputs.
 pub fn input_boundaries(text: &str) -> anyhow::Result<Vec<InputBoundary>> {
-    let mut lines: Vec<&str> = text.lines().collect();
-
-    // Rectify for `lines()` ignoring trailing empty lines
-    match text.chars().last() {
-        Some(last) if last == '\n' => lines.push(""),
-        None => lines.push(""),
-        _ => {},
-    }
-
-    // Create a duplicate vector of lines on the R side too so we don't have to
-    // reallocate memory each time we parse a new subset of lines
-    let lines_r = CharacterVector::create(lines.iter());
+    let lines: Vec<&str> = crate::strings::lines(text).collect();
     let n_lines: u32 = lines.len().try_into()?;
 
+    // Create the vector on the R side so we don't have to reallocate memory
+    // for the elements each time we parse a new subset of lines
+    let lines = CharacterVector::create(lines);
+
     let mut complete: Vec<LineRange> = vec![];
     let mut incomplete: Option<LineRange> = None;
     let mut invalid: Option<LineRange> = None;
@@ -120,9 +113,9 @@ pub fn input_boundaries(text: &str) -> anyhow::Result<Vec<InputBoundary>> {
         // Grab all code up to current line. We don't slice the vector in the
         // first iteration as it's not needed.
         let subset = if current_line == n_lines - 1 {
-            lines_r.clone()
+            lines.clone()
         } else {
-            CharacterVector::try_from(&lines_r.slice()[..=current_line as usize])?
+            CharacterVector::try_from(&lines.slice()[..=current_line as usize])?
         };
 
         // Parse within source file to get source references

diff --git a/crates/ark/src/fixtures/dummy_frontend.rs b/crates/ark/src/fixtures/dummy_frontend.rs
@@ -55,6 +55,13 @@ impl DummyArkFrontend {
         let frontend = DummyFrontend::new();
         let connection_file = frontend.get_connection_file();
 
+        // We don't want cli to try and restore the cursor, it breaks our tests
+        // by adding unecessary ANSI escapes. We don't need this in Positron because
+        // cli also checks `isatty(stdout())`, which is false in Positron because
+        // we redirect stdout.
+        // https://github.com/r-lib/cli/blob/1220ed092c03e167ff0062e9839c81d7258a4600/R/onload.R#L33-L40
+        unsafe { std::env::set_var("R_CLI_HIDE_CURSOR", "false") };
+
         // Start the kernel in this thread so that panics are propagated
         crate::start::start_kernel(
             connection_file,