From 0381b93fcbb3301411590a8927f4157b2f09816a Mon Sep 17 00:00:00 2001 From: Gabriel Hansson Date: Fri, 19 Apr 2024 15:51:32 +0200 Subject: [PATCH] chore - codegen: extract c string escaping to a separate file (#1028) --- prost-build/src/code_generator.rs | 141 +------------------ prost-build/src/code_generator/c_escaping.rs | 139 ++++++++++++++++++ 2 files changed, 142 insertions(+), 138 deletions(-) create mode 100644 prost-build/src/code_generator/c_escaping.rs diff --git a/prost-build/src/code_generator.rs b/prost-build/src/code_generator.rs index 406c5cc9e..5acb90d1c 100644 --- a/prost-build/src/code_generator.rs +++ b/prost-build/src/code_generator.rs @@ -20,6 +20,9 @@ use crate::ident::{strip_enum_prefix, to_snake, to_upper_camel}; use crate::message_graph::MessageGraph; use crate::{BytesType, Config, MapType}; +mod c_escaping; +use c_escaping::unescape_c_escape_string; + #[derive(PartialEq)] enum Syntax { Proto2, @@ -1076,112 +1079,6 @@ fn can_pack(field: &FieldDescriptorProto) -> bool { ) } -/// Based on [`google::protobuf::UnescapeCEscapeString`][1] -/// [1]: https://github.com/google/protobuf/blob/3.3.x/src/google/protobuf/stubs/strutil.cc#L312-L322 -fn unescape_c_escape_string(s: &str) -> Vec { - let src = s.as_bytes(); - let len = src.len(); - let mut dst = Vec::new(); - - let mut p = 0; - - while p < len { - if src[p] != b'\\' { - dst.push(src[p]); - p += 1; - } else { - p += 1; - if p == len { - panic!( - "invalid c-escaped default binary value ({}): ends with '\'", - s - ) - } - match src[p] { - b'a' => { - dst.push(0x07); - p += 1; - } - b'b' => { - dst.push(0x08); - p += 1; - } - b'f' => { - dst.push(0x0C); - p += 1; - } - b'n' => { - dst.push(0x0A); - p += 1; - } - b'r' => { - dst.push(0x0D); - p += 1; - } - b't' => { - dst.push(0x09); - p += 1; - } - b'v' => { - dst.push(0x0B); - p += 1; - } - b'\\' => { - dst.push(0x5C); - p += 1; - } - b'?' => { - dst.push(0x3F); - p += 1; - } - b'\'' => { - dst.push(0x27); - p += 1; - } - b'"' => { - dst.push(0x22); - p += 1; - } - b'0'..=b'7' => { - debug!("another octal: {}, offset: {}", s, &s[p..]); - let mut octal = 0; - for _ in 0..3 { - if p < len && src[p] >= b'0' && src[p] <= b'7' { - debug!("\toctal: {}", octal); - octal = octal * 8 + (src[p] - b'0'); - p += 1; - } else { - break; - } - } - dst.push(octal); - } - b'x' | b'X' => { - if p + 3 > len { - panic!( - "invalid c-escaped default binary value ({}): incomplete hex value", - s - ) - } - match u8::from_str_radix(&s[p + 1..p + 3], 16) { - Ok(b) => dst.push(b), - _ => panic!( - "invalid c-escaped default binary value ({}): invalid hex value", - &s[p..p + 2] - ), - } - p += 3; - } - _ => panic!( - "invalid c-escaped default binary value ({}): invalid escape", - s - ), - } - } - } - dst -} - struct EnumVariantMapping<'a> { path_idx: usize, proto_name: &'a str, @@ -1262,35 +1159,3 @@ impl BytesType { } } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_unescape_c_escape_string() { - assert_eq!( - &b"hello world"[..], - &unescape_c_escape_string("hello world")[..] - ); - - assert_eq!(&b"\0"[..], &unescape_c_escape_string(r#"\0"#)[..]); - - assert_eq!( - &[0o012, 0o156], - &unescape_c_escape_string(r#"\012\156"#)[..] - ); - assert_eq!(&[0x01, 0x02], &unescape_c_escape_string(r#"\x01\x02"#)[..]); - - assert_eq!( - &b"\0\x01\x07\x08\x0C\n\r\t\x0B\\\'\"\xFE"[..], - &unescape_c_escape_string(r#"\0\001\a\b\f\n\r\t\v\\\'\"\xfe"#)[..] - ); - } - - #[test] - #[should_panic(expected = "incomplete hex value")] - fn test_unescape_c_escape_string_incomplete_hex_value() { - unescape_c_escape_string(r#"\x1"#); - } -} diff --git a/prost-build/src/code_generator/c_escaping.rs b/prost-build/src/code_generator/c_escaping.rs new file mode 100644 index 000000000..58b2ede4e --- /dev/null +++ b/prost-build/src/code_generator/c_escaping.rs @@ -0,0 +1,139 @@ +use log::debug; + +/// Based on [`google::protobuf::UnescapeCEscapeString`][1] +/// [1]: https://github.com/google/protobuf/blob/3.3.x/src/google/protobuf/stubs/strutil.cc#L312-L322 +pub(super) fn unescape_c_escape_string(s: &str) -> Vec { + let src = s.as_bytes(); + let len = src.len(); + let mut dst = Vec::new(); + + let mut p = 0; + + while p < len { + if src[p] != b'\\' { + dst.push(src[p]); + p += 1; + } else { + p += 1; + if p == len { + panic!( + "invalid c-escaped default binary value ({}): ends with '\'", + s + ) + } + match src[p] { + b'a' => { + dst.push(0x07); + p += 1; + } + b'b' => { + dst.push(0x08); + p += 1; + } + b'f' => { + dst.push(0x0C); + p += 1; + } + b'n' => { + dst.push(0x0A); + p += 1; + } + b'r' => { + dst.push(0x0D); + p += 1; + } + b't' => { + dst.push(0x09); + p += 1; + } + b'v' => { + dst.push(0x0B); + p += 1; + } + b'\\' => { + dst.push(0x5C); + p += 1; + } + b'?' => { + dst.push(0x3F); + p += 1; + } + b'\'' => { + dst.push(0x27); + p += 1; + } + b'"' => { + dst.push(0x22); + p += 1; + } + b'0'..=b'7' => { + debug!("another octal: {}, offset: {}", s, &s[p..]); + let mut octal = 0; + for _ in 0..3 { + if p < len && src[p] >= b'0' && src[p] <= b'7' { + debug!("\toctal: {}", octal); + octal = octal * 8 + (src[p] - b'0'); + p += 1; + } else { + break; + } + } + dst.push(octal); + } + b'x' | b'X' => { + if p + 3 > len { + panic!( + "invalid c-escaped default binary value ({}): incomplete hex value", + s + ) + } + match u8::from_str_radix(&s[p + 1..p + 3], 16) { + Ok(b) => dst.push(b), + _ => panic!( + "invalid c-escaped default binary value ({}): invalid hex value", + &s[p..p + 2] + ), + } + p += 3; + } + _ => panic!( + "invalid c-escaped default binary value ({}): invalid escape", + s + ), + } + } + } + dst +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_unescape_c_escape_string() { + assert_eq!( + &b"hello world"[..], + &unescape_c_escape_string("hello world")[..] + ); + + assert_eq!(&b"\0"[..], &unescape_c_escape_string(r#"\0"#)[..]); + + assert_eq!( + &[0o012, 0o156], + &unescape_c_escape_string(r#"\012\156"#)[..] + ); + assert_eq!(&[0x01, 0x02], &unescape_c_escape_string(r#"\x01\x02"#)[..]); + + assert_eq!( + &b"\0\x01\x07\x08\x0C\n\r\t\x0B\\\'\"\xFE"[..], + &unescape_c_escape_string(r#"\0\001\a\b\f\n\r\t\v\\\'\"\xfe"#)[..] + ); + } + + #[test] + #[should_panic(expected = "incomplete hex value")] + fn test_unescape_c_escape_string_incomplete_hex_value() { + unescape_c_escape_string(r#"\x1"#); + } +}