diff --git a/CHANGELOG.md b/CHANGELOG.md index 503cb171..2de8731c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ * Add `tall-style` experiment flag to enable the in-progress unstable new formatting style (#1253). * Format extension types. +* Normalize ignored whitespace and "escaped whitespace" on first line + of multiline string literals. (#1235) ## 2.3.3 diff --git a/lib/src/source_visitor.dart b/lib/src/source_visitor.dart index c9381976..383a8fa7 100644 --- a/lib/src/source_visitor.dart +++ b/lib/src/source_visitor.dart @@ -4158,6 +4158,8 @@ class SourceVisitor extends ThrowingAstVisitor { _endBody(rightBracket, forceSplit: nodes.isNotEmpty); } + static final _lineTerminatorRE = RegExp(r'\r\n?|\n'); + /// Writes the string literal [string] to the output. /// /// Splits multiline strings into separate chunks so that the line splitter @@ -4167,14 +4169,20 @@ class SourceVisitor extends ThrowingAstVisitor { // comments are written first. writePrecedingCommentsAndNewlines(string); - // Split each line of a multiline string into separate chunks. - var lines = string.lexeme.split(_formatter.lineEnding!); + var lines = string.lexeme.split(_lineTerminatorRE); var offset = string.offset; + var firstLine = lines.first; + if (lines.length > 1) { + // Special case for multiline string which contains + // at least one newline. + _writeStringFirstLine(firstLine, string, offset: offset); + } else { + _writeText(firstLine, string, offset: offset); + } + offset += firstLine.length; - _writeText(lines.first, string, offset: offset); - offset += lines.first.length; - - for (var line in lines.skip(1)) { + for (var i = 1; i < lines.length; i++) { + var line = lines[i]; builder.writeNewline(flushLeft: true, nest: true); offset++; _writeText(line, string, offset: offset, mergeEmptySplits: false); @@ -4182,6 +4190,75 @@ class SourceVisitor extends ThrowingAstVisitor { } } + /// Writes the first line of a multi-line string. + /// + /// If the string is a multiline string, and it has only whitespace + /// and escaped whitespace before a first line break, + /// omit the non-escaped trailing whitespace. + /// Normalize escaped non-final whitspace to spaces. + /// + /// More specifically: + /// If a multiline string literal contains at least one line-break + /// (a CR, LF or CR+LF) as part of the source character content + /// (characters inside interpolation expressions do not count), + /// and the source characters from the starting quote to the first + /// line-break contains only the characters space, tab and backslash, + /// with no two adjacent backslashes, then that part of the string source, + /// including the following line break, is excluded from particiapting + /// code points to the string value. + /// + /// This function normalizes such excluded character sequences + /// to just the back-slashes, separated by space characters. + void _writeStringFirstLine(String line, Token string, {required int offset}) { + // Detect leading whitespace on the first line of multiline strings. + var quoteStart = line.startsWith('r') ? 1 : 0; + var quoteEnd = quoteStart + 3; + var backslashCount = 0; + if (line.length > quoteEnd && + (line.startsWith("'''", quoteStart) || + line.startsWith('"""', quoteStart))) { + // Start of a multiline string literal. + // Check if rest of the line is whitespace, possibly preceded by + // backslash, or has a single trailing backslash preceding the newline. + // Count the backslashes. + var cursor = quoteEnd; + const backslash = 0x5c; + const space = 0x20; + const tab = 0x09; + + do { + var char = line.codeUnitAt(cursor); + if (char == backslash) { + cursor += 1; + backslashCount++; + if (cursor >= line.length) { + break; + } + char = line.codeUnitAt(cursor); + } + if (char != space && char != tab) break; + cursor++; + } while (cursor < line.length); + if (cursor == line.length) { + // No invalid character sequence found before end of line. + // Normalize the ignored "escaped" whitespace which has no + // effect on string content. + var firstLineText = line.substring(0, quoteEnd); + if (backslashCount > 0) { + var buffer = StringBuffer(firstLineText); + buffer.write(r'\'); + while (--backslashCount > 0) { + buffer.write(r' \'); + } + firstLineText = buffer.toString(); + } + _writeText(firstLineText, string, offset: offset); + return; + } + } + _writeText(line, string, offset: offset); + } + /// Write the comma token following [node], if there is one. void _writeCommaAfter(AstNode node) { token(node.commaAfter); diff --git a/test/whitespace/multiline_string_first_list.stmt b/test/whitespace/multiline_string_first_list.stmt new file mode 100644 index 00000000..03b3ca61 --- /dev/null +++ b/test/whitespace/multiline_string_first_list.stmt @@ -0,0 +1,117 @@ +40 columns | +>>> trailing all-space multiline string (×20 = space, ×09 = tab) +var x = '''×20×09×20×09 +×20×09 +×09×20'''; +<<< +var x = ''' +×20×09 +×09×20'''; +>>> single trailing space +var x = '''×20 +z'''; +<<< +var x = ''' +z'''; +>>> single trailing tab +var x = '''×09 +z'''; +<<< +var x = ''' +z'''; +>>> trailing all-space multiline raw string +var x = r'''×20×09×20×09 +×20×09 +×09×20'''; +<<< +var x = r''' +×20×09 +×09×20'''; +>>> no trailing characters, nothing happens +var x = ''' +×20×09'''; +<<< +var x = ''' +×20×09'''; +>>> no line break, nothing happnes +var x = '''×20×09'''; +<<< +var x = '''×20×09'''; +>>> line break not part of string. +var x = '''×20×09×20×09${ +''}×20×09'''; +<<< +var x = '''×20×09×20×09${''}×20×09'''; +>>> "escapes" allowed, not removed, but normalized +var x = '''×20×09\×20\×09×20×09 +'''; +<<< +var x = '''\×20\ +'''; +>>> single escaped space +var x = '''\×20 +z'''; +<<< +var x = '''\ +z'''; +>>> single escaped tab +var x = '''\×09 +z'''; +<<< +var x = '''\ +z'''; +>>> single trailing escape +var x = '''\ +z'''; +<<< +var x = '''\ +z'''; +>>> final "escape" allowed too, not removed, but normalized +var x = '''×20×09\×20\×09×20×09\ +'''; +<<< +var x = '''\×20\×20\ +'''; +>>> "escape" allowed in raw strings, not removed, but normalized +var x = r'''×20×09\×20\×09×20×09\ +'''; +<<< +var x = r'''\×20\×20\ +'''; +>>> A "double-escape" is not an escaped whitspace +var x = '''×20×09\\×20 +'''; +<<< +var x = '''×20×09\\×20 +'''; +>>> Non-whitispace character zero-content part on first line +var x = ''' ${''}×20×09 +'''; +<<< +var x = ''' ${''}×20×09 +'''; +>>> interpolations do not start a new "first line" +var x = '''×20×09 +${''}×20×09 +'''; +<<< +var x = ''' +${''}×20×09 +'''; +>>> Works with any line break - U+000A +var x = '''×20×0a×20'''; +<<< +var x = ''' +×20'''; +>>> Works with any line break - U+000D +var x = '''×20×0d×20'''; +<<< +var x = ''' +×20'''; +>>> Works with any line break - U+000D U+000A +// First linebreak is not \r\n. +var x = '''×20×0d×0az×20'''; +<<< +// First linebreak is not \r\n. +var x = ''' +z×20''';