From f08c75e5e0644320fc32cb2d78d0d46216961765 Mon Sep 17 00:00:00 2001 From: Eyal Kalderon Date: Thu, 20 May 2021 12:49:38 -0400 Subject: [PATCH 1/2] Eliminate looping, message reparsing in codec with take_until This commit changes `LanguageServerCodec` to recover from parse failures more efficiently. Using nom's SIMD-accelerated `take_until()` combinator, we can skip forward in the byte stream much more efficiently and without any message reparsing nor looping necessary. --- src/codec.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/codec.rs b/src/codec.rs index b0221133..643937ac 100644 --- a/src/codec.rs +++ b/src/codec.rs @@ -10,9 +10,9 @@ use bytes::buf::BufMut; use bytes::{Buf, BytesMut}; use log::trace; use nom::branch::alt; -use nom::bytes::streaming::{is_not, tag}; +use nom::bytes::streaming::{is_not, tag, take_until}; use nom::character::streaming::{char, crlf, digit1, space0}; -use nom::combinator::{map_res, opt}; +use nom::combinator::{map, map_res, opt}; use nom::error::ErrorKind; use nom::multi::length_data; use nom::sequence::{delimited, terminated, tuple}; @@ -145,16 +145,18 @@ impl Decoder for LanguageServerCodec { } Err(Err::Error(err)) | Err(Err::Failure(err)) => { let code = err.code; - loop { - use ParseError::*; - match parse_message(src) { - Err(_) if !src.is_empty() => src.advance(1), - _ => match code { - ErrorKind::Digit | ErrorKind::MapRes => return Err(InvalidLength), - ErrorKind::Char | ErrorKind::IsNot => return Err(InvalidType), - _ => return Err(MissingHeader), - }, - } + let parsed_bytes = src.len() - err.input.len(); + src.advance(parsed_bytes); + + match find_next_message(src) { + Ok((_, position)) => src.advance(position), + Err(_) => src.advance(src.len()), + } + + match code { + ErrorKind::Digit | ErrorKind::MapRes => return Err(ParseError::InvalidLength), + ErrorKind::Char | ErrorKind::IsNot => return Err(ParseError::InvalidType), + _ => return Err(ParseError::MissingHeader), } } }; @@ -193,6 +195,10 @@ fn parse_message(input: &[u8]) -> IResult<&[u8], &[u8]> { message(input) } +fn find_next_message(input: &[u8]) -> IResult<&[u8], usize> { + map(take_until("Content-Length"), |s: &[u8]| s.len())(input) +} + #[cfg(test)] mod tests { use bytes::BytesMut; From c537596704cbe374bbdcf8cecab99ce3b4ce401f Mon Sep 17 00:00:00 2001 From: Eyal Kalderon Date: Thu, 20 May 2021 13:24:54 -0400 Subject: [PATCH 2/2] Improve codec test coverage This commit adds a helper function for encoding test LSP messages and expands codec test coverage, checking for more parse errors. --- src/codec.rs | 50 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/src/codec.rs b/src/codec.rs index 643937ac..13900e25 100644 --- a/src/codec.rs +++ b/src/codec.rs @@ -206,10 +206,23 @@ mod tests { use super::*; + fn encode_message(content_type: Option<&str>, message: &str) -> String { + let content_type = content_type + .map(|ty| format!("\r\nContent-Type: {}", ty)) + .unwrap_or_default(); + + format!( + "Content-Length: {}{}\r\n\r\n{}", + message.len(), + content_type, + message + ) + } + #[test] fn encode_and_decode() { - let decoded = r#"{"jsonrpc":"2.0","method":"exit"}"#.to_string(); - let encoded = format!("Content-Length: {}\r\n\r\n{}", decoded.len(), decoded); + let decoded = r#"{"jsonrpc":"2.0","method":"exit"}"#; + let encoded = encode_message(None, &decoded); let mut codec = LanguageServerCodec::default(); let mut buffer = BytesMut::new(); @@ -225,10 +238,9 @@ mod tests { #[test] fn decodes_optional_content_type() { - let decoded = r#"{"jsonrpc":"2.0","method":"exit"}"#.to_string(); - let content_len = format!("Content-Length: {}", decoded.len()); - let content_type = "Content-Type: application/vscode-jsonrpc; charset=utf-8".to_string(); - let encoded = format!("{}\r\n{}\r\n\r\n{}", content_len, content_type, decoded); + let decoded = r#"{"jsonrpc":"2.0","method":"exit"}"#; + let content_type = "application/vscode-jsonrpc; charset=utf-8"; + let encoded = encode_message(Some(content_type), decoded); let mut codec = LanguageServerCodec::default(); let mut buffer = BytesMut::from(encoded.as_str()); @@ -239,8 +251,8 @@ mod tests { #[test] fn decodes_zero_length_message() { - let content_type = "Content-Type: application/vscode-jsonrpc; charset=utf-8".to_string(); - let encoded = format!("Content-Length: 0\r\n{}\r\n\r\n", content_type); + let content_type = "Content-Type: application/vscode-jsonrpc; charset=utf-8"; + let encoded = encode_message(Some(content_type), ""); let mut codec = LanguageServerCodec::default(); let mut buffer = BytesMut::from(encoded.as_str()); @@ -250,9 +262,9 @@ mod tests { #[test] fn recovers_from_parse_error() { - let decoded = r#"{"jsonrpc":"2.0","method":"exit"}"#.to_string(); - let encoded = format!("Content-Length: {}\r\n\r\n{}", decoded.len(), decoded); - let mixed = format!("1234567890abcdefgh{}", encoded); + let decoded = r#"{"jsonrpc":"2.0","method":"exit"}"#; + let encoded = encode_message(None, decoded); + let mixed = format!("foobar{}Content-Length: foobar\r\n\r\n{}", encoded, encoded); let mut codec = LanguageServerCodec::default(); let mut buffer = BytesMut::from(mixed.as_str()); @@ -262,8 +274,20 @@ mod tests { other => panic!("expected `Err(ParseError::MissingHeader)`, got {:?}", other), } + let message: Option = codec.decode(&mut buffer).unwrap(); + let first_valid = serde_json::from_str(&decoded).unwrap(); + assert_eq!(message, Some(first_valid)); + + match codec.decode(&mut buffer) { + Err(ParseError::InvalidLength) => {} + other => panic!("expected `Err(ParseError::InvalidLength)`, got {:?}", other), + } + let message = codec.decode(&mut buffer).unwrap(); - let decoded: Value = serde_json::from_str(&decoded).unwrap(); - assert_eq!(message, Some(decoded)); + let second_valid = serde_json::from_str(&decoded).unwrap(); + assert_eq!(message, Some(second_valid)); + + let message = codec.decode(&mut buffer).unwrap(); + assert_eq!(message, None); } }