Skip to content

Commit

Permalink
Start building the new Piece internal representation and new style.
Browse files Browse the repository at this point in the history
This gets the foundation in place for the new IR, the new visitor that
produces it, the new line splitter that consumes it, the new style, and
new tests for it.

It also adds support for formatting library, import, and export
directives to make sure everything is wired up together and working.

Existing formatting behavior is unchanged. You have to opt in to the
new stuff by passing "tall-style" as an experiment flag.

This PR doesn't support comments, but it does leave some unused code
in a few places that will be used to handle comments in future PRs.

It has many UnimplementedError throws. Those will get filled in as more
of the language is implemented in the new style.

There are also two new kinds of TODO comments:

  TODO(tall): ...

These comments describe that work that needs to be done before the new
style is fully working.

  TODO(perf): ...

These describe potential areas for optimization. Once more of the
language is supported with the new IR and I can run some larger
benchmark samples through it, I can start exploring where the actual
performance problems are. For now, I'm just leaving breadcrumbs.
  • Loading branch information
munificent committed Sep 12, 2023
1 parent 1a2def9 commit 1f7a188
Show file tree
Hide file tree
Showing 25 changed files with 2,333 additions and 28 deletions.
17 changes: 10 additions & 7 deletions example/format.dart
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,25 @@ void main(List<String> args) {
debug.traceLineWriter = true;
debug.traceSplitter = true;
debug.useAnsiColors = true;
debug.tracePieceBuilder = true;
debug.traceSolver = true;

formatStmt('a is int????;');
formatUnit("import 'a.dart';", tall: true);
}

void formatStmt(String source, [int pageWidth = 80]) {
runFormatter(source, pageWidth, isCompilationUnit: false);
void formatStmt(String source, {required bool tall, int pageWidth = 80}) {
runFormatter(source, pageWidth, tall: tall, isCompilationUnit: false);
}

void formatUnit(String source, [int pageWidth = 80]) {
runFormatter(source, pageWidth, isCompilationUnit: true);
void formatUnit(String source, {required bool tall, int pageWidth = 80}) {
runFormatter(source, pageWidth, tall: tall, isCompilationUnit: true);
}

void runFormatter(String source, int pageWidth,
{required bool isCompilationUnit}) {
{required bool tall, required bool isCompilationUnit}) {
try {
var formatter = DartFormatter(pageWidth: pageWidth);
var formatter = DartFormatter(
pageWidth: pageWidth, experimentFlags: [if (tall) 'tall-style']);

String result;
if (isCompilationUnit) {
Expand Down
198 changes: 198 additions & 0 deletions lib/src/back_end/code_writer.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import '../constants.dart';
import '../piece/piece.dart';
import 'solution.dart';

/// The interface that pieces use when to produce output formatting themselves
/// during state solving.
class CodeWriter {
final int _pageWidth;

/// The state values for the pieces being written.
final PieceStateSet _pieceStates;

/// Buffer for the code being written.
final StringBuffer _buffer = StringBuffer();

/// The cost of the currently chosen line splits.
int _cost = 0;

/// The number of characters of code that have overflowed the page width so
/// far.
int _overflow = 0;

/// How long the line currently being written is.
int _column = 0;

/// Whether this solution has encountered a newline where none is allowed.
/// This means the solution is invalid.
bool _containsInvalidNewline = false;

/// For each piece being formatted from a call to [format], this tracks the
/// indentation of any new lines it begins.
///
/// This is used to increase the cumulative nesting as we recurse into pieces
/// and then unwind that as child pieces are completed.
final List<_PieceOptions> _pieceOptions = [_PieceOptions(0, 0, true)];

/// The options for the current innermost piece being formatted.
_PieceOptions get _options => _pieceOptions.last;

CodeWriter(this._pageWidth, this._pieceStates);

/// Returns the finished code produced by formatting the tree of pieces and
/// the final score.
(String, Score) finish() {
_finishLine();
return (
_buffer.toString(),
Score(isValid: !_containsInvalidNewline, overflow: _overflow, cost: _cost)
);
}

/// Notes that a newline has been written.
///
/// If this occurs in a place where newlines are prohibited, then invalidates
/// the solution.
///
/// This is called externally by [TextPiece] to let the writer know some of
/// the raw text contains a newline, which can happen in multi-line block
/// comments and multi-line string literals.
void handleNewline() {
if (!_options.allowNewlines) _containsInvalidNewline = true;
}

void write(String text) {
_buffer.write(text);
_column += text.length;
}

/// Sets the number of spaces of indentation for code written by the current
/// piece to [indent].
///
/// Replaces any previous indentation set by this piece.
void setIndent(int indent) {
// Include indentation from surrounding pieces.
_options.indent = _pieceOptions[_pieceOptions.length - 2].indent + indent;
}

/// Increase the expression nesting of the current piece if [condition] is
/// `true`.
void nestIf(bool condition) {
if (!condition) return;

_options.nesting += Indent.expression;
}

/// Sets the number of spaces of expression nesting for code written by the
/// current piece to [nesting].
///
/// Replaces any previous nesting set by this piece.
void setNesting(int nesting) {
// Include nesting from surrounding pieces.
_options.nesting =
_pieceOptions[_pieceOptions.length - 2].nesting + nesting;
}

/// Inserts a newline if [condition] is true.
///
/// If [space] is `true` and [condition] is `false`, writes a space.
///
/// If [indent] is given, sets the amount of block-level indentation for this
/// and all subsequent newlines to [indent].
void splitIf(bool condition, {bool space = true, int? indent}) {
if (indent != null) setIndent(indent);

if (condition) {
newline();
} else if (space) {
this.space();
}
}

/// Writes a single space to the output.
void space() {
write(' ');
}

/// Inserts a line split in the output. If [blank] is true, writes an extra
/// newline to produce a blank line.
void newline({bool blank = false}) {
handleNewline();
_finishLine();
_buffer.writeln();
if (blank) _buffer.writeln();

_column = _options.combinedIndentation;
_buffer.write(' ' * _column);
}

/// Sets whether newlines are allowed to occur from this point on for the
/// current piece of any of its children.
void setAllowNewlines(bool allowed) {
_options.allowNewlines = allowed;
}

/// Format [piece] and insert the result into the code.
void format(Piece piece) {
// Don't bother recursing into the piece tree if we know the solution will
// be discarded.
if (_containsInvalidNewline) return;

// TODO(tall): Sometimes, we'll want to reset the expression nesting for
// an inner piece, for when a block-like construct appears inside an
// expression. If it turns out that we don't actually need to handle indent
// and nesting separately here, then merge them into a single field.
_pieceOptions.add(_PieceOptions(
_options.indent, _options.nesting, _options.allowNewlines));

var state = _pieceStates.pieceState(piece);

// TODO(tall): Support pieces with different split costs, and possibly
// different costs for each state value.
if (state != 0) _cost++;

// TODO(perf): Memoize this. Might want to create a nested PieceWriter
// instead of passing in `this` so we can better control what state needs
// to be used as the key in the memoization table.
piece.format(this, state);

_pieceOptions.removeLast();
}

/// Format [piece] if not null.
void formatOptional(Piece? piece) {
if (piece != null) format(piece);
}

void _finishLine() {
// If the completed line is too long, track the overflow.
if (_column >= _pageWidth) {
_overflow += _column - _pageWidth;
}
}
}

/// Tracks the mutable state local to a single piece currently being formatted.
class _PieceOptions {
/// The number of spaces of leading indentation coming from block-like
/// structure or explicit extra indentation (aligning constructor
/// initializers, `show` clauses, etc.).
int indent;

/// The number of spaces of indentation from wrapped expressions.
int nesting;

/// The total number of spaces of indentation.
int get combinedIndentation => indent + nesting;

/// Whether newlines are allowed to occur.
///
/// If a newline is written while this is `false`, the entire solution is
/// considered invalid and gets discarded.
bool allowNewlines;

_PieceOptions(this.indent, this.nesting, this.allowNewlines);
}
167 changes: 167 additions & 0 deletions lib/src/back_end/solution.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import '../piece/piece.dart';
import 'code_writer.dart';

/// A possibly incomplete set of selected states for a set of pieces being
/// solved.
class PieceStateSet {
// TODO(perf): Looking up and expanding the set of chunk states was a
// performance bottleneck in the old line splitter. If that turns out to be
// true here, then consider a faster representation for this list and the
// subsequent map field.
/// The in-order flattened list of all pieces being solved.
///
/// This doesn't include pieces like text that have only a single value since
/// there's nothing to solve for them.
final List<Piece> _pieces;

final Map<Piece, int> _pieceStates;

/// Creates a new [PieceStateSet] with no pieces set to any state (which
/// implicitly means they have state 0).
PieceStateSet(this._pieces) : _pieceStates = {};

PieceStateSet._(this._pieces, this._pieceStates);

/// The state this solution selects for [piece].
int pieceState(Piece piece) => _pieceStates[piece] ?? 0;

/// Gets the first piece that doesn't have a state selected yet, or `null` if
/// all pieces have selected states.
Piece? firstUnsolved() {
// TODO(perf): This may be slow. Could store the index at construction time.
for (var piece in _pieces) {
if (!_pieceStates.containsKey(piece)) {
return piece;
}
}

return null;
}

/// Creates a clone of this state with [piece] bound to [state].
PieceStateSet cloneWith(Piece piece, int state) {
return PieceStateSet._(_pieces, {..._pieceStates, piece: state});
}

@override
String toString() {
return _pieces.map((piece) {
var state = _pieceStates[piece];
var stateLabel = state == null ? '?' : '$state';
return '$piece:$stateLabel';
}).join(' ');
}
}

/// A single possible line splitting solution.
///
/// Stores the states that each piece is set to and the resulting formatted
/// code and its cost.
class Solution implements Comparable<Solution> {
/// The states the pieces have been set to in this solution.
final PieceStateSet _state;

/// The formatted code.
final String text;

/// The score resulting from the selected piece states.
final Score score;

factory Solution(Piece root, int pageWidth, PieceStateSet state) {
var writer = CodeWriter(pageWidth, state);
writer.format(root);
var (text, score) = writer.finish();
return Solution._(state, text, score);
}

Solution._(this._state, this.text, this.score);

/// When called on a [Solution] with some unselected piece states, chooses a
/// piece and yields further solutions for each state that piece can have.
List<Solution> expand(Piece root, int pageWidth) {
var piece = _state.firstUnsolved();
if (piece == null) return const [];

var result = <Solution>[];
for (var i = 0; i < piece.stateCount; i++) {
var solution = Solution(root, pageWidth, _state.cloneWith(piece, i));
result.add(solution);
}

return result;
}

/// For performance, we want to stop checking solutions as soon as we find
/// the best one. Best means the fewest overflow characters and the lowest
/// code.
@override
int compareTo(Solution other) {
var scoreComparison = score.compareTo(other.score);
if (scoreComparison != 0) return scoreComparison;

// Should be solving the same set of pieces.
assert(_state._pieces.length == other._state._pieces.length);

// If all else is equal, prefer lower states in earlier pieces.
// TODO(tall): This might not be needed once piece scoring is more
// sophisticated.
for (var i = 0; i < _state._pieces.length; i++) {
var piece = _state._pieces[i];
var thisState = _state.pieceState(piece);
var otherState = other._state.pieceState(piece);
if (thisState != otherState) return thisState.compareTo(otherState);
}

return 0;
}

@override
String toString() => '$score $_state';
}

class Score implements Comparable<Score> {
// TODO(tall): Should this actually be part of scoring? Do we want to use
// validity to determine how we order solutions to explore?
/// Whether this score is for a valid solution or not.
///
/// An invalid solution is one where a hard newline appears in a context
/// where splitting isn't allowed. This is considered worse than any other
/// solution.
final bool isValid;

/// The number of characters that do not fit inside the page width.
final int overflow;

/// The amount of penalties applied based on the chosen line splits.
final int cost;

Score({required this.isValid, required this.overflow, required this.cost});

@override
int compareTo(Score other) {
// All invalid solutions are equal.
if (!isValid && !other.isValid) return 0;

// We are looking for *lower* costs and overflows, so an invalid score is
// considered higher or after all others.
if (!isValid) return 1;
if (!other.isValid) return -1;

// Overflow is always penalized more than line splitting cost.
if (overflow != other.overflow) return overflow.compareTo(other.overflow);

return cost.compareTo(other.cost);
}

@override
String toString() {
return [
'\$$cost',
if (overflow > 0) '($overflow over)',
if (!isValid) '(invalid)'
].join(' ');
}
}
Loading

0 comments on commit 1f7a188

Please sign in to comment.