Skip to content

Commit

Permalink
Remove trailing whitespace on first line of multiline string. (#1236)
Browse files Browse the repository at this point in the history
Remove trailing whitespace on first line of multiline string.

Normalizes, but retains, backslashes on the first ignored line of a multiline string.
(There should probably be a fix to remove them.)
  • Loading branch information
lrhn authored Oct 19, 2023
1 parent 53dc7e1 commit a528dc5
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* Add `tall-style` experiment flag to enable the in-progress unstable new
formatting style (#1253).
* Format extension types.
* Normalize ignored whitespace and "escaped whitespace" on first line
of multiline string literals. (#1235)

## 2.3.3

Expand Down
89 changes: 83 additions & 6 deletions lib/src/source_visitor.dart
Original file line number Diff line number Diff line change
Expand Up @@ -4158,6 +4158,8 @@ class SourceVisitor extends ThrowingAstVisitor {
_endBody(rightBracket, forceSplit: nodes.isNotEmpty);
}

static final _lineTerminatorRE = RegExp(r'\r\n?|\n');

/// Writes the string literal [string] to the output.
///
/// Splits multiline strings into separate chunks so that the line splitter
Expand All @@ -4167,21 +4169,96 @@ class SourceVisitor extends ThrowingAstVisitor {
// comments are written first.
writePrecedingCommentsAndNewlines(string);

// Split each line of a multiline string into separate chunks.
var lines = string.lexeme.split(_formatter.lineEnding!);
var lines = string.lexeme.split(_lineTerminatorRE);
var offset = string.offset;
var firstLine = lines.first;
if (lines.length > 1) {
// Special case for multiline string which contains
// at least one newline.
_writeStringFirstLine(firstLine, string, offset: offset);
} else {
_writeText(firstLine, string, offset: offset);
}
offset += firstLine.length;

_writeText(lines.first, string, offset: offset);
offset += lines.first.length;

for (var line in lines.skip(1)) {
for (var i = 1; i < lines.length; i++) {
var line = lines[i];
builder.writeNewline(flushLeft: true, nest: true);
offset++;
_writeText(line, string, offset: offset, mergeEmptySplits: false);
offset += line.length;
}
}

/// Writes the first line of a multi-line string.
///
/// If the string is a multiline string, and it has only whitespace
/// and escaped whitespace before a first line break,
/// omit the non-escaped trailing whitespace.
/// Normalize escaped non-final whitspace to spaces.
///
/// More specifically:
/// If a multiline string literal contains at least one line-break
/// (a CR, LF or CR+LF) as part of the source character content
/// (characters inside interpolation expressions do not count),
/// and the source characters from the starting quote to the first
/// line-break contains only the characters space, tab and backslash,
/// with no two adjacent backslashes, then that part of the string source,
/// including the following line break, is excluded from particiapting
/// code points to the string value.
///
/// This function normalizes such excluded character sequences
/// to just the back-slashes, separated by space characters.
void _writeStringFirstLine(String line, Token string, {required int offset}) {
// Detect leading whitespace on the first line of multiline strings.
var quoteStart = line.startsWith('r') ? 1 : 0;
var quoteEnd = quoteStart + 3;
var backslashCount = 0;
if (line.length > quoteEnd &&
(line.startsWith("'''", quoteStart) ||
line.startsWith('"""', quoteStart))) {
// Start of a multiline string literal.
// Check if rest of the line is whitespace, possibly preceded by
// backslash, or has a single trailing backslash preceding the newline.
// Count the backslashes.
var cursor = quoteEnd;
const backslash = 0x5c;
const space = 0x20;
const tab = 0x09;

do {
var char = line.codeUnitAt(cursor);
if (char == backslash) {
cursor += 1;
backslashCount++;
if (cursor >= line.length) {
break;
}
char = line.codeUnitAt(cursor);
}
if (char != space && char != tab) break;
cursor++;
} while (cursor < line.length);
if (cursor == line.length) {
// No invalid character sequence found before end of line.
// Normalize the ignored "escaped" whitespace which has no
// effect on string content.
var firstLineText = line.substring(0, quoteEnd);
if (backslashCount > 0) {
var buffer = StringBuffer(firstLineText);
buffer.write(r'\');
while (--backslashCount > 0) {
buffer.write(r' \');
}
firstLineText = buffer.toString();
}
_writeText(firstLineText, string, offset: offset);
return;
}
}
_writeText(line, string, offset: offset);
}

/// Write the comma token following [node], if there is one.
void _writeCommaAfter(AstNode node) {
token(node.commaAfter);
Expand Down
117 changes: 117 additions & 0 deletions test/whitespace/multiline_string_first_list.stmt
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
40 columns |
>>> trailing all-space multiline string (×20 = space, ×09 = tab)
var x = '''×20×09×20×09
×20×09
×09×20''';
<<<
var x = '''
×20×09
×09×20''';
>>> single trailing space
var x = '''×20
z''';
<<<
var x = '''
z''';
>>> single trailing tab
var x = '''×09
z''';
<<<
var x = '''
z''';
>>> trailing all-space multiline raw string
var x = r'''×20×09×20×09
×20×09
×09×20''';
<<<
var x = r'''
×20×09
×09×20''';
>>> no trailing characters, nothing happens
var x = '''
×20×09''';
<<<
var x = '''
×20×09''';
>>> no line break, nothing happnes
var x = '''×20×09''';
<<<
var x = '''×20×09''';
>>> line break not part of string.
var x = '''×20×09×20×09${
''}×20×09''';
<<<
var x = '''×20×09×20×09${''}×20×09''';
>>> "escapes" allowed, not removed, but normalized
var x = '''×20×09\×20\×09×20×09
''';
<<<
var x = '''\×20\
''';
>>> single escaped space
var x = '''\×20
z''';
<<<
var x = '''\
z''';
>>> single escaped tab
var x = '''\×09
z''';
<<<
var x = '''\
z''';
>>> single trailing escape
var x = '''\
z''';
<<<
var x = '''\
z''';
>>> final "escape" allowed too, not removed, but normalized
var x = '''×20×09\×20\×09×20×09\
''';
<<<
var x = '''\×20\×20\
''';
>>> "escape" allowed in raw strings, not removed, but normalized
var x = r'''×20×09\×20\×09×20×09\
''';
<<<
var x = r'''\×20\×20\
''';
>>> A "double-escape" is not an escaped whitspace
var x = '''×20×09\\×20
''';
<<<
var x = '''×20×09\\×20
''';
>>> Non-whitispace character zero-content part on first line
var x = ''' ${''}×20×09
''';
<<<
var x = ''' ${''}×20×09
''';
>>> interpolations do not start a new "first line"
var x = '''×20×09
${''}×20×09
''';
<<<
var x = '''
${''}×20×09
''';
>>> Works with any line break - U+000A
var x = '''×20×0a×20''';
<<<
var x = '''
×20''';
>>> Works with any line break - U+000D
var x = '''×20×0d×20''';
<<<
var x = '''
×20''';
>>> Works with any line break - U+000D U+000A
// First linebreak is not \r\n.
var x = '''×20×0d×0az×20''';
<<<
// First linebreak is not \r\n.
var x = '''
z×20''';

0 comments on commit a528dc5

Please sign in to comment.