Skip to content

Commit

Permalink
feat: show printable byte arrays as byte strings in SSA (#6709)
Browse files Browse the repository at this point in the history
  • Loading branch information
asterite authored Dec 5, 2024
1 parent 37a4996 commit fc11b63
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 22 deletions.
12 changes: 6 additions & 6 deletions compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -842,27 +842,27 @@ mod tests {

#[test]
fn simplify_derive_generators_has_correct_type() {
let src = "
let src = r#"
brillig(inline) fn main f0 {
b0():
v0 = make_array [u8 68, u8 69, u8 70, u8 65, u8 85, u8 76, u8 84, u8 95, u8 68, u8 79, u8 77, u8 65, u8 73, u8 78, u8 95, u8 83, u8 69, u8 80, u8 65, u8 82, u8 65, u8 84, u8 79, u8 82] : [u8; 24]
v0 = make_array b"DEFAULT_DOMAIN_SEPARATOR"
// This call was previously incorrectly simplified to something that returned `[Field; 3]`
v2 = call derive_pedersen_generators(v0, u32 0) -> [(Field, Field, u1); 1]
return v2
}
";
"#;
let ssa = Ssa::from_str(src).unwrap();

let expected = "
let expected = r#"
brillig(inline) fn main f0 {
b0():
v15 = make_array [u8 68, u8 69, u8 70, u8 65, u8 85, u8 76, u8 84, u8 95, u8 68, u8 79, u8 77, u8 65, u8 73, u8 78, u8 95, u8 83, u8 69, u8 80, u8 65, u8 82, u8 65, u8 84, u8 79, u8 82] : [u8; 24]
v15 = make_array b"DEFAULT_DOMAIN_SEPARATOR"
v19 = make_array [Field 3728882899078719075161482178784387565366481897740339799480980287259621149274, Field -9903063709032878667290627648209915537972247634463802596148419711785767431332, u1 0] : [(Field, Field, u1); 1]
return v19
}
";
"#;
assert_normalized_ssa_equals(ssa, expected);
}
}
44 changes: 44 additions & 0 deletions compiler/noirc_evaluator/src/ssa/ir/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ use std::{
};

use acvm::acir::AcirField;
use im::Vector;
use iter_extended::vecmap;

use crate::ssa::ir::types::{NumericType, Type};

use super::{
basic_block::BasicBlockId,
dfg::DataFlowGraph,
Expand Down Expand Up @@ -220,6 +223,28 @@ fn display_instruction_inner(
)
}
Instruction::MakeArray { elements, typ } => {
// If the array is a byte array, we check if all the bytes are printable ascii characters
// and, if so, we print the array as a string literal (easier to understand).
// It could happen that the byte array is a random byte sequence that happens to be printable
// (it didn't come from a string literal) but this still reduces the noise in the output
// and actually represents the same value.
let (element_types, is_slice) = match typ {
Type::Array(types, _) => (types, false),
Type::Slice(types) => (types, true),
_ => panic!("Expected array or slice type for MakeArray"),
};
if element_types.len() == 1
&& element_types[0] == Type::Numeric(NumericType::Unsigned { bit_size: 8 })
{
if let Some(string) = try_byte_array_to_string(elements, function) {
if is_slice {
return writeln!(f, "make_array &b{:?}", string);
} else {
return writeln!(f, "make_array b{:?}", string);
}
}
}

write!(f, "make_array [")?;

for (i, element) in elements.iter().enumerate() {
Expand All @@ -234,6 +259,25 @@ fn display_instruction_inner(
}
}

fn try_byte_array_to_string(elements: &Vector<ValueId>, function: &Function) -> Option<String> {
let mut string = String::new();
for element in elements {
let element = function.dfg.get_numeric_constant(*element)?;
let element = element.try_to_u32()?;
if element > 0xFF {
return None;
}
let byte = element as u8;
if byte.is_ascii_alphanumeric() || byte.is_ascii_punctuation() || byte.is_ascii_whitespace()
{
string.push(byte as char);
} else {
return None;
}
}
Some(string)
}

fn result_types(function: &Function, results: &[ValueId]) -> String {
let types = vecmap(results, |result| function.dfg.type_of_value(*result).to_string());
if types.is_empty() {
Expand Down
24 changes: 18 additions & 6 deletions compiler/noirc_evaluator/src/ssa/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ impl<'a> Lexer<'a> {
Some('-') if self.peek_char() == Some('>') => self.double_char_token(Token::Arrow),
Some('-') => self.single_char_token(Token::Dash),
Some('"') => self.eat_string_literal(),
Some('b') if self.peek_char() == Some('"') => self.eat_byte_string_literal(),
Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
Some(char) => Err(LexerError::UnexpectedCharacter {
char,
Expand Down Expand Up @@ -180,8 +181,23 @@ impl<'a> Lexer<'a> {

fn eat_string_literal(&mut self) -> SpannedTokenResult {
let start = self.position;
let mut string = String::new();
let string = self.eat_string(start)?;
let str_literal_token = Token::Str(string);
let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

fn eat_byte_string_literal(&mut self) -> SpannedTokenResult {
let start = self.position;
self.next_char(); // skip the b
let string = self.eat_string(start)?;
let str_literal_token = Token::ByteStr(string);
let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

fn eat_string(&mut self, start: u32) -> Result<String, LexerError> {
let mut string = String::new();
while let Some(next) = self.next_char() {
let char = match next {
'"' => break,
Expand All @@ -206,11 +222,7 @@ impl<'a> Lexer<'a> {

string.push(char);
}

let str_literal_token = Token::Str(string);

let end = self.position;
Ok(str_literal_token.into_span(start, end))
Ok(string)
}

fn eat_while<F: Fn(char) -> bool>(
Expand Down
66 changes: 59 additions & 7 deletions compiler/noirc_evaluator/src/ssa/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ use std::{
};

use super::{
ir::{instruction::BinaryOp, types::Type},
ir::{
instruction::BinaryOp,
types::{NumericType, Type},
},
Ssa,
};

Expand Down Expand Up @@ -448,12 +451,39 @@ impl<'a> Parser<'a> {
}

if self.eat_keyword(Keyword::MakeArray)? {
self.eat_or_error(Token::LeftBracket)?;
let elements = self.parse_comma_separated_values()?;
self.eat_or_error(Token::RightBracket)?;
self.eat_or_error(Token::Colon)?;
let typ = self.parse_type()?;
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
if self.eat(Token::Ampersand)? {
let Some(string) = self.eat_byte_str()? else {
return self.expected_byte_string();
};
let u8 = Type::Numeric(NumericType::Unsigned { bit_size: 8 });
let typ = Type::Slice(Arc::new(vec![u8.clone()]));
let elements = string
.bytes()
.map(|byte| ParsedValue::NumericConstant {
constant: FieldElement::from(byte as u128),
typ: u8.clone(),
})
.collect();
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
} else if let Some(string) = self.eat_byte_str()? {
let u8 = Type::Numeric(NumericType::Unsigned { bit_size: 8 });
let typ = Type::Array(Arc::new(vec![u8.clone()]), string.len() as u32);
let elements = string
.bytes()
.map(|byte| ParsedValue::NumericConstant {
constant: FieldElement::from(byte as u128),
typ: u8.clone(),
})
.collect();
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
} else {
self.eat_or_error(Token::LeftBracket)?;
let elements = self.parse_comma_separated_values()?;
self.eat_or_error(Token::RightBracket)?;
self.eat_or_error(Token::Colon)?;
let typ = self.parse_type()?;
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
}
}

if self.eat_keyword(Keyword::Not)? {
Expand Down Expand Up @@ -796,6 +826,18 @@ impl<'a> Parser<'a> {
}
}

fn eat_byte_str(&mut self) -> ParseResult<Option<String>> {
if matches!(self.token.token(), Token::ByteStr(..)) {
let token = self.bump()?;
match token.into_token() {
Token::ByteStr(string) => Ok(Some(string)),
_ => unreachable!(),
}
} else {
Ok(None)
}
}

fn eat(&mut self, token: Token) -> ParseResult<bool> {
if self.token.token() == &token {
self.bump()?;
Expand Down Expand Up @@ -848,6 +890,13 @@ impl<'a> Parser<'a> {
})
}

fn expected_byte_string<T>(&mut self) -> ParseResult<T> {
Err(ParserError::ExpectedByteString {
found: self.token.token().clone(),
span: self.token.to_span(),
})
}

fn expected_identifier<T>(&mut self) -> ParseResult<T> {
Err(ParserError::ExpectedIdentifier {
found: self.token.token().clone(),
Expand Down Expand Up @@ -911,6 +960,8 @@ pub(crate) enum ParserError {
ExpectedInstructionOrTerminator { found: Token, span: Span },
#[error("Expected a string literal or 'data', found '{found}'")]
ExpectedStringOrData { found: Token, span: Span },
#[error("Expected a byte string literal, found '{found}'")]
ExpectedByteString { found: Token, span: Span },
#[error("Expected a value, found '{found}'")]
ExpectedValue { found: Token, span: Span },
#[error("Multiple return values only allowed for call")]
Expand All @@ -928,6 +979,7 @@ impl ParserError {
| ParserError::ExpectedType { span, .. }
| ParserError::ExpectedInstructionOrTerminator { span, .. }
| ParserError::ExpectedStringOrData { span, .. }
| ParserError::ExpectedByteString { span, .. }
| ParserError::ExpectedValue { span, .. } => *span,
ParserError::MultipleReturnValuesOnlyAllowedForCall { second_target, .. } => {
second_target.span
Expand Down
30 changes: 27 additions & 3 deletions compiler/noirc_evaluator/src/ssa/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,30 @@ fn test_make_composite_array() {
assert_ssa_roundtrip(src);
}

#[test]
fn test_make_byte_array_with_string_literal() {
let src = "
acir(inline) fn main f0 {
b0():
v9 = make_array b\"Hello world!\"
return v9
}
";
assert_ssa_roundtrip(src);
}

#[test]
fn test_make_byte_slice_with_string_literal() {
let src = "
acir(inline) fn main f0 {
b0():
v9 = make_array &b\"Hello world!\"
return v9
}
";
assert_ssa_roundtrip(src);
}

#[test]
fn test_block_parameters() {
let src = "
Expand Down Expand Up @@ -228,14 +252,14 @@ fn test_constrain_with_static_message() {

#[test]
fn test_constrain_with_dynamic_message() {
let src = "
let src = r#"
acir(inline) fn main f0 {
b0(v0: Field, v1: Field):
v7 = make_array [u8 123, u8 120, u8 125, u8 32, u8 123, u8 121, u8 125] : [u8; 7]
v7 = make_array b"{x} {y}"
constrain v0 == Field 1, data v7, u32 2, v0, v1
return
}
";
"#;
assert_ssa_roundtrip(src);
}

Expand Down
2 changes: 2 additions & 0 deletions compiler/noirc_evaluator/src/ssa/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pub(crate) enum Token {
Ident(String),
Int(FieldElement),
Str(String),
ByteStr(String),
Keyword(Keyword),
IntType(IntType),
/// =
Expand Down Expand Up @@ -79,6 +80,7 @@ impl Display for Token {
Token::Ident(ident) => write!(f, "{}", ident),
Token::Int(int) => write!(f, "{}", int),
Token::Str(string) => write!(f, "{string:?}"),
Token::ByteStr(string) => write!(f, "{string:?}"),
Token::Keyword(keyword) => write!(f, "{}", keyword),
Token::IntType(int_type) => write!(f, "{}", int_type),
Token::Assign => write!(f, "="),
Expand Down

0 comments on commit fc11b63

Please sign in to comment.