Skip to content

Commit

Permalink
Introduce Indent datatype
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Feb 9, 2024
1 parent 1ce07d6 commit 7893be7
Showing 1 changed file with 142 additions and 63 deletions.
205 changes: 142 additions & 63 deletions crates/ruff_python_formatter/src/string/docstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
// "reStructuredText."
#![allow(clippy::doc_markdown)]

use std::cmp::Ordering;
use std::{borrow::Cow, collections::VecDeque};

use itertools::Itertools;

use ruff_formatter::printer::SourceMapGeneration;
use ruff_python_parser::ParseError;

use {once_cell::sync::Lazy, regex::Regex};
use {
ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed},
Expand Down Expand Up @@ -180,7 +182,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
.clone()
// We don't want to count whitespace-only lines as miss-indented
.filter(|line| !line.trim().is_empty())
.map(indentation_length)
.map(|line| Indent::from_str(line).len())
.min()
.unwrap_or_default();

Expand Down Expand Up @@ -345,7 +347,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
};
// This looks suspicious, but it's consistent with the whitespace
// normalization that will occur anyway.
let indent = " ".repeat(min_indent);
let indent = " ".repeat(min_indent.len());
for docline in formatted_lines {
self.print_one(
&docline.map(|line| std::format!("{indent}{line}")),
Expand All @@ -355,7 +357,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
CodeExampleKind::Markdown(fenced) => {
// This looks suspicious, but it's consistent with the whitespace
// normalization that will occur anyway.
let indent = " ".repeat(fenced.opening_fence_indent);
let indent = " ".repeat(fenced.opening_fence_indent.len());
for docline in formatted_lines {
self.print_one(
&docline.map(|line| std::format!("{indent}{line}")),
Expand Down Expand Up @@ -400,7 +402,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
// overindented, in which case we strip the additional whitespace
// (see example in [`format_docstring`] doc comment). We then
// prepend the in-docstring indentation to the string.
let indent_len = indentation_length(trim_end) - self.stripped_indentation_length;
let indent_len = Indent::from_str(trim_end).len() - self.stripped_indentation_length;
let in_docstring_indent = " ".repeat(indent_len) + trim_end.trim_start();
text(&in_docstring_indent).fmt(self.f)?;
} else {
Expand Down Expand Up @@ -907,7 +909,7 @@ struct CodeExampleRst<'src> {
/// The content body of a block needs to be indented more than the line
/// opening the block, so we use this indentation to look for indentation
/// that is "more than" it.
opening_indent: usize,
opening_indent: Indent,

/// The minimum indent of the block measured via `indentation_length`.
///
Expand All @@ -926,7 +928,7 @@ struct CodeExampleRst<'src> {
/// When the code snippet has been extracted, it is re-built before being
/// reformatted. The minimum indent is stripped from each line when it is
/// re-built.
min_indent: Option<usize>,
min_indent: Option<Indent>,

/// Whether this is a directive block or not. When not a directive, this is
/// a literal block. The main difference between them is that they start
Expand Down Expand Up @@ -975,7 +977,7 @@ impl<'src> CodeExampleRst<'src> {
}
Some(CodeExampleRst {
lines: vec![],
opening_indent: indentation_length(opening_indent),
opening_indent: Indent::from_str(opening_indent),
min_indent: None,
is_directive: false,
})
Expand Down Expand Up @@ -1013,7 +1015,7 @@ impl<'src> CodeExampleRst<'src> {
}
Some(CodeExampleRst {
lines: vec![],
opening_indent: indentation_length(original.line),
opening_indent: Indent::from_str(original.line),
min_indent: None,
is_directive: true,
})
Expand All @@ -1033,7 +1035,7 @@ impl<'src> CodeExampleRst<'src> {
line.code = if line.original.line.trim().is_empty() {
""
} else {
indentation_trim(min_indent, line.original.line)
min_indent.trim(line.original.line)
};
}
&self.lines
Expand Down Expand Up @@ -1070,7 +1072,7 @@ impl<'src> CodeExampleRst<'src> {
// an empty line followed by an unindented non-empty line.
if let Some(next) = original.next {
let (next_indent, next_rest) = indent_with_suffix(next);
if !next_rest.is_empty() && indentation_length(next_indent) <= self.opening_indent {
if !next_rest.is_empty() && Indent::from_str(next_indent) <= self.opening_indent {
self.push_format_action(queue);
return None;
}
Expand All @@ -1082,7 +1084,7 @@ impl<'src> CodeExampleRst<'src> {
queue.push_back(CodeExampleAddAction::Kept);
return Some(self);
}
let indent_len = indentation_length(indent);
let indent_len = Indent::from_str(indent);
if indent_len <= self.opening_indent {
// If we find an unindented non-empty line at the same (or less)
// indentation of the opening line at this point, then we know it
Expand Down Expand Up @@ -1144,7 +1146,7 @@ impl<'src> CodeExampleRst<'src> {
queue.push_back(CodeExampleAddAction::Print { original });
return Some(self);
}
let min_indent = indentation_length(indent);
let min_indent = Indent::from_str(indent);
// At this point, we found a non-empty line. The only thing we require
// is that its indentation is strictly greater than the indentation of
// the line containing the `::`. Otherwise, we treat this as an invalid
Expand Down Expand Up @@ -1223,7 +1225,7 @@ struct CodeExampleMarkdown<'src> {
///
/// This indentation is trimmed from the indentation of every line in the
/// body of the code block,
opening_fence_indent: usize,
opening_fence_indent: Indent,

/// The kind of fence, backticks or tildes, used for this block. We need to
/// keep track of which kind was used to open the block in order to look
Expand Down Expand Up @@ -1292,7 +1294,7 @@ impl<'src> CodeExampleMarkdown<'src> {
};
Some(CodeExampleMarkdown {
lines: vec![],
opening_fence_indent: indentation_length(opening_fence_indent),
opening_fence_indent: Indent::from_str(opening_fence_indent),
fence_kind,
fence_len,
})
Expand Down Expand Up @@ -1325,7 +1327,7 @@ impl<'src> CodeExampleMarkdown<'src> {
// its indent normalized. And, at the time of writing, a subsequent
// formatting run undoes this indentation, thus violating idempotency.
if !original.line.trim_whitespace().is_empty()
&& indentation_length(original.line) < self.opening_fence_indent
&& Indent::from_str(original.line) < self.opening_fence_indent
{
queue.push_back(self.into_reset_action());
queue.push_back(CodeExampleAddAction::Print { original });
Expand Down Expand Up @@ -1371,7 +1373,7 @@ impl<'src> CodeExampleMarkdown<'src> {
// Unlike reStructuredText blocks, for Markdown fenced code blocks, the
// indentation that we want to strip from each line is known when the
// block is opened. So we can strip it as we collect lines.
let code = indentation_trim(self.opening_fence_indent, original.line);
let code = self.opening_fence_indent.trim(original.line);
self.lines.push(CodeExampleLine { original, code });
}

Expand Down Expand Up @@ -1537,53 +1539,131 @@ fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
}

/// Returns the indentation's visual width in columns/spaces.
///
/// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up
/// to the next multiple of 8. This is effectively a port of
/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs),
/// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61).
fn indentation_length(line: &str) -> usize {
let mut indentation = 0usize;
for char in line.chars() {
if char == '\t' {
// Pad to the next multiple of tab_width
indentation += 8 - (indentation.rem_euclid(8));
} else if char.is_whitespace() {
indentation += char.len_utf8();
#[derive(Copy, Clone, Debug)]
enum Indent {
/// Space only indentation or an empty indentation.
///
/// The value is the number of spaces.
Spaces(usize),

/// Tabs only indentation.
Tabs { count: usize },

/// Smart tab indentation that uses tabs for indents, and spaces for alignment.
Align { tabs: usize, spaces: usize },

/// Mixed indentation of tabs and spaces.
Mixed(usize),
}

impl Indent {
fn from_str(s: &str) -> Indent {
let mut iter = s.chars().peekable();

let spaces = iter.peeking_take_while(|c| *c == ' ').count();
let tabs = iter.peeking_take_while(|c| *c == '\t').count();

if tabs == 0 {
// No indent, or spaces only indent
return Indent::Spaces(spaces);
}

// Test if there are any spaces following the tabs
let spaces = iter.peeking_take_while(|c| *c == ' ').count();

if spaces == 0 {
return Indent::Tabs { count: tabs };
}

// At this point it's either a smart tab (tabs followed by spaces) or a wild mix of tabs and spaces.
if iter.peek().copied() == Some('\t') {
// Sequence of tabs, spaces, tabs...
let indent_width = 8;
let mut indentation = tabs * indent_width + spaces;

for char in iter {
if char == '\t' {
// Pad to the next multiple of tab_width
indentation += indent_width - (indentation.rem_euclid(indent_width));
} else if char.is_whitespace() {
indentation += char.len_utf8();
} else {
break;
}
}

// Mixed tabs and spaces
Indent::Mixed(indentation)
} else {
break;
Indent::Align { tabs, spaces }
}
}
indentation
}

/// Trims at most `indent_len` indentation from the beginning of `line`.
///
/// This treats indentation in precisely the same way as `indentation_length`.
/// As such, it is expected that `indent_len` is computed from
/// `indentation_length`. This is useful when one needs to trim some minimum
/// level of indentation from a code snippet collected from a docstring before
/// attempting to reformat it.
fn indentation_trim(indent_len: usize, line: &str) -> &str {
let mut seen_indent_len = 0;
let mut trimmed = line;
for char in line.chars() {
if seen_indent_len >= indent_len {
return trimmed;
/// Returns the indentation's visual width in columns/spaces.
///
/// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up
/// to the next multiple of 8. This is effectively a port of
/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs),
/// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61).
const fn len(self) -> usize {
let indent_width = 8usize;
match self {
Indent::Spaces(count) => count,
Indent::Tabs { count } => count * indent_width,
Indent::Align { tabs, spaces } => tabs * indent_width + spaces,
Indent::Mixed(width) => width,
}
if char == '\t' {
// Pad to the next multiple of tab_width
seen_indent_len += 8 - (seen_indent_len.rem_euclid(8));
trimmed = &trimmed[1..];
} else if char.is_whitespace() {
seen_indent_len += char.len_utf8();
trimmed = &trimmed[char.len_utf8()..];
} else {
break;
}

/// Trims at most `indent_len` indentation from the beginning of `line`.
///
/// This treats indentation in precisely the same way as `indentation_length`.
/// As such, it is expected that `indent_len` is computed from
/// `indentation_length`. This is useful when one needs to trim some minimum
/// level of indentation from a code snippet collected from a docstring before
/// attempting to reformat it.
fn trim(self, line: &str) -> &str {
let indent_width = 8usize;

let mut seen_indent_len = 0;
let mut trimmed = line;
let indent_len = self.len();

for char in line.chars() {
if seen_indent_len >= indent_len {
return trimmed;
}
if char == '\t' {
// Pad to the next multiple of tab_width
seen_indent_len += indent_width - (seen_indent_len.rem_euclid(indent_width));
trimmed = &trimmed[1..];
} else if char.is_whitespace() {
seen_indent_len += char.len_utf8();
trimmed = &trimmed[char.len_utf8()..];
} else {
break;
}
}
trimmed
}
}

impl PartialOrd for Indent {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.len().cmp(&other.len()))
}
}

impl PartialEq for Indent {
fn eq(&self, other: &Self) -> bool {
self.len() == other.len()
}
}

impl Default for Indent {
fn default() -> Self {
Indent::Spaces(0)
}
trimmed
}

/// Returns the indentation of the given line and everything following it.
Expand Down Expand Up @@ -1613,14 +1693,13 @@ fn is_rst_option(line: &str) -> bool {

#[cfg(test)]
mod tests {

use super::indentation_length;
use crate::string::docstring::Indent;

#[test]
fn test_indentation_like_black() {
assert_eq!(indentation_length("\t \t \t"), 24);
assert_eq!(indentation_length("\t \t"), 24);
assert_eq!(indentation_length("\t\t\t"), 24);
assert_eq!(indentation_length(" "), 4);
assert_eq!(Indent::from_str("\t \t \t").len(), 24);
assert_eq!(Indent::from_str("\t \t").len(), 24);
assert_eq!(Indent::from_str("\t\t\t").len(), 24);
assert_eq!(Indent::from_str(" ").len(), 4);
}
}

0 comments on commit 7893be7

Please sign in to comment.