Skip to content

Commit

Permalink
[clang][Diagnostics] Highlight code snippets (#66514)
Browse files Browse the repository at this point in the history
Add some primitive syntax highlighting to our code snippet output.

This adds "checkpoints" to the Preprocessor, which we can use to start lexing from. When printing a code snippet, we lex from the nearest checkpoint and highlight the tokens based on their token type.
  • Loading branch information
tbaederr committed Jan 27, 2024
1 parent 863b2c8 commit 718aac9
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 14 deletions.
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ Attribute Changes in Clang

Improvements to Clang's diagnostics
-----------------------------------
- Clang now applies syntax highlighting to the code snippets it
prints.

Improvements to Clang's time-trace
----------------------------------
Expand Down
18 changes: 14 additions & 4 deletions clang/include/clang/Frontend/TextDiagnostic.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H

#include "clang/Frontend/DiagnosticRenderer.h"
#include "llvm/Support/raw_ostream.h"

namespace clang {

Expand All @@ -33,14 +34,22 @@ namespace clang {
/// printing coming out of libclang.
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
const Preprocessor *PP;

public:
TextDiagnostic(raw_ostream &OS,
const LangOptions &LangOpts,
DiagnosticOptions *DiagOpts);
TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr);

~TextDiagnostic() override;

struct StyleRange {
unsigned Start;
unsigned End;
enum llvm::raw_ostream::Colors Color;
StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C)
: Start(S), End(E), Color(C){};
};

/// Print the diagonstic level to a raw_ostream.
///
/// This is a static helper that handles colorizing the level and formatting
Expand Down Expand Up @@ -104,7 +113,8 @@ class TextDiagnostic : public DiagnosticRenderer {
ArrayRef<FixItHint> Hints);

void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
unsigned LineNo);
unsigned LineNo, unsigned DisplayLineNo,
ArrayRef<StyleRange> Styles);

void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
};
Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,13 @@ class Preprocessor {
/// The kind of translation unit we are processing.
const TranslationUnitKind TUKind;

/// Returns a pointer into the given file's buffer that's guaranteed
/// to be between tokens. The returned pointer is always before \p Start.
/// The maximum distance betweenthe returned pointer and \p Start is
/// limited by a constant value, but also an implementation detail.
/// If no such check point exists, \c nullptr is returned.
const char *getCheckPoint(FileID FID, const char *Start) const;

private:
/// The code-completion handler.
CodeCompletionHandler *CodeComplete = nullptr;
Expand Down Expand Up @@ -311,6 +318,9 @@ class Preprocessor {
/// The import path for named module that we're currently processing.
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;

llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
unsigned CheckPointCounter = 0;

/// Whether the import is an `@import` or a standard c++ modules import.
bool IsAtImport = false;

Expand Down
211 changes: 202 additions & 9 deletions clang/lib/Frontend/TextDiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ConvertUTF.h"
Expand Down Expand Up @@ -41,6 +42,14 @@ static const enum raw_ostream::Colors fatalColor = raw_ostream::RED;
static const enum raw_ostream::Colors savedColor =
raw_ostream::SAVEDCOLOR;

// Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
// is already taken for 'note'. Green is already used to underline
// source ranges. White and black are bad because of the usual
// terminal backgrounds. Which leaves us only with TWO options.
static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;

/// Add highlights to differences in template strings.
static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
bool &Normal, bool Bold) {
Expand Down Expand Up @@ -644,10 +653,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
return Wrapped;
}

TextDiagnostic::TextDiagnostic(raw_ostream &OS,
const LangOptions &LangOpts,
DiagnosticOptions *DiagOpts)
: DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {}
TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
DiagnosticOptions *DiagOpts,
const Preprocessor *PP)
: DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}

TextDiagnostic::~TextDiagnostic() {}

Expand Down Expand Up @@ -1112,6 +1121,162 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
return LineRanges;
}

/// Creates syntax highlighting information in form of StyleRanges.
///
/// The returned unique ptr has always exactly size
/// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there
/// corresponds to syntax highlighting information in one line. In each line,
/// the StyleRanges are non-overlapping and sorted from start to end of the
/// line.
static std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]>
highlightLines(StringRef FileData, unsigned StartLineNumber,
unsigned EndLineNumber, const Preprocessor *PP,
const LangOptions &LangOpts, bool ShowColors, FileID FID,
const SourceManager &SM) {
assert(StartLineNumber <= EndLineNumber);
auto SnippetRanges =
std::make_unique<SmallVector<TextDiagnostic::StyleRange>[]>(
EndLineNumber - StartLineNumber + 1);

if (!PP || !ShowColors)
return SnippetRanges;

// Might cause emission of another diagnostic.
if (PP->getIdentifierTable().getExternalIdentifierLookup())
return SnippetRanges;

auto Buff = llvm::MemoryBuffer::getMemBuffer(FileData);
Lexer L{FID, *Buff, SM, LangOpts};
L.SetKeepWhitespaceMode(true);

const char *FirstLineStart =
FileData.data() +
SM.getDecomposedLoc(SM.translateLineCol(FID, StartLineNumber, 1)).second;
if (const char *CheckPoint = PP->getCheckPoint(FID, FirstLineStart)) {
assert(CheckPoint >= Buff->getBufferStart() &&
CheckPoint <= Buff->getBufferEnd());
assert(CheckPoint <= FirstLineStart);
size_t Offset = CheckPoint - Buff->getBufferStart();
L.seek(Offset, /*IsAtStartOfLine=*/false);
}

// Classify the given token and append it to the given vector.
auto appendStyle =
[PP, &LangOpts](SmallVector<TextDiagnostic::StyleRange> &Vec,
const Token &T, unsigned Start, unsigned Length) -> void {
if (T.is(tok::raw_identifier)) {
StringRef RawIdent = T.getRawIdentifier();
// Special case true/false/nullptr/... literals, since they will otherwise
// be treated as keywords.
// FIXME: It would be good to have a programmatic way of getting this
// list.
if (llvm::StringSwitch<bool>(RawIdent)
.Case("true", true)
.Case("false", true)
.Case("nullptr", true)
.Case("__func__", true)
.Case("__objc_yes__", true)
.Case("__objc_no__", true)
.Case("__null", true)
.Case("__FUNCDNAME__", true)
.Case("__FUNCSIG__", true)
.Case("__FUNCTION__", true)
.Case("__FUNCSIG__", true)
.Default(false)) {
Vec.emplace_back(Start, Start + Length, LiteralColor);
} else {
const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
assert(II);
if (II->isKeyword(LangOpts))
Vec.emplace_back(Start, Start + Length, KeywordColor);
}
} else if (tok::isLiteral(T.getKind())) {
Vec.emplace_back(Start, Start + Length, LiteralColor);
} else {
assert(T.is(tok::comment));
Vec.emplace_back(Start, Start + Length, CommentColor);
}
};

bool Stop = false;
while (!Stop) {
Token T;
Stop = L.LexFromRawLexer(T);
if (T.is(tok::unknown))
continue;

// We are only interested in identifiers, literals and comments.
if (!T.is(tok::raw_identifier) && !T.is(tok::comment) &&
!tok::isLiteral(T.getKind()))
continue;

bool Invalid = false;
unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid);
if (Invalid || TokenEndLine < StartLineNumber)
continue;

assert(TokenEndLine >= StartLineNumber);

unsigned TokenStartLine =
SM.getSpellingLineNumber(T.getLocation(), &Invalid);
if (Invalid)
continue;
// If this happens, we're done.
if (TokenStartLine > EndLineNumber)
break;

unsigned StartCol =
SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
if (Invalid)
continue;

// Simple tokens.
if (TokenStartLine == TokenEndLine) {
SmallVector<TextDiagnostic::StyleRange> &LineRanges =
SnippetRanges[TokenStartLine - StartLineNumber];
appendStyle(LineRanges, T, StartCol, T.getLength());
continue;
}
assert((TokenEndLine - TokenStartLine) >= 1);

// For tokens that span multiple lines (think multiline comments), we
// divide them into multiple StyleRanges.
unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
if (Invalid)
continue;

std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);

unsigned L = TokenStartLine;
unsigned LineLength = 0;
for (unsigned I = 0; I <= Spelling.size(); ++I) {
// This line is done.
if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) {
SmallVector<TextDiagnostic::StyleRange> &LineRanges =
SnippetRanges[L - StartLineNumber];

if (L >= StartLineNumber) {
if (L == TokenStartLine) // First line
appendStyle(LineRanges, T, StartCol, LineLength);
else if (L == TokenEndLine) // Last line
appendStyle(LineRanges, T, 0, EndCol);
else
appendStyle(LineRanges, T, 0, LineLength);
}

++L;
if (L > EndLineNumber)
break;
LineLength = 0;
continue;
}
++LineLength;
}
}

return SnippetRanges;
}

/// Emit a code snippet and caret line.
///
/// This routine emits a single line's code snippet and caret line..
Expand Down Expand Up @@ -1181,6 +1346,12 @@ void TextDiagnostic::emitSnippetAndCaret(
OS.indent(MaxLineNoDisplayWidth + 2) << "| ";
};

// Prepare source highlighting information for the lines we're about to
// emit, starting from the first line.
std::unique_ptr<SmallVector<StyleRange>[]> SourceStyles =
highlightLines(BufStart, Lines.first, Lines.second, PP, LangOpts,
DiagOpts->ShowColors, FID, SM);

SmallVector<LineRange> LineRanges =
prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts);

Expand Down Expand Up @@ -1247,7 +1418,8 @@ void TextDiagnostic::emitSnippetAndCaret(
}

// Emit what we have computed.
emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo);
emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo,
SourceStyles[LineNo - Lines.first]);

if (!CaretLine.empty()) {
indentForLineNumbers();
Expand Down Expand Up @@ -1277,16 +1449,18 @@ void TextDiagnostic::emitSnippetAndCaret(

void TextDiagnostic::emitSnippet(StringRef SourceLine,
unsigned MaxLineNoDisplayWidth,
unsigned LineNo) {
unsigned LineNo, unsigned DisplayLineNo,
ArrayRef<StyleRange> Styles) {
// Emit line number.
if (MaxLineNoDisplayWidth > 0) {
unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo);
OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1)
<< LineNo << " | ";
<< DisplayLineNo << " | ";
}

// Print the source line one character at a time.
bool PrintReversed = false;
std::optional<llvm::raw_ostream::Colors> CurrentColor;
size_t I = 0;
while (I < SourceLine.size()) {
auto [Str, WasPrintable] =
Expand All @@ -1298,10 +1472,29 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
PrintReversed = !PrintReversed;
if (PrintReversed)
OS.reverseColor();
else
else {
OS.resetColor();
CurrentColor = std::nullopt;
}
}

// Apply syntax highlighting information.
const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) {
return (R.Start < I && R.End >= I);
});

if (CharStyle != Styles.end()) {
if (!CurrentColor ||
(CurrentColor && *CurrentColor != CharStyle->Color)) {
OS.changeColor(CharStyle->Color, false);
CurrentColor = CharStyle->Color;
}
} else if (CurrentColor) {
OS.resetColor();
CurrentColor = std::nullopt;
}
}

OS << Str;
}

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Frontend/TextDiagnosticPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {
void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
const Preprocessor *PP) {
// Build the TextDiagnostic utility.
TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts));
TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP));
}

void TextDiagnosticPrinter::EndSourceFile() {
Expand Down
Loading

0 comments on commit 718aac9

Please sign in to comment.