Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Modules] No transitive source location change #86912

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/SourceLocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class SourceLocation {
friend class ASTWriter;
friend class SourceManager;
friend struct llvm::FoldingSetTrait<SourceLocation, void>;
friend class SourceLocationEncoding;

public:
using UIntTy = uint32_t;
Expand Down
56 changes: 25 additions & 31 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Serialization/SourceLocationEncoding.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Bitstream/BitCodes.h"
#include <cassert>
Expand Down Expand Up @@ -167,45 +168,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;

/// Source range/offset of a preprocessed entity.
struct PPEntityOffset {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location of beginning of range.
SourceLocation::UIntTy Begin;
RawLocEncoding Begin;

/// Raw source location of end of range.
SourceLocation::UIntTy End;
RawLocEncoding End;

/// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
uint32_t BitOffset;

PPEntityOffset(SourceRange R, uint32_t BitOffset)
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
BitOffset(BitOffset) {}

SourceLocation getBegin() const {
return SourceLocation::getFromRawEncoding(Begin);
}
PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
: Begin(Begin), End(End), BitOffset(BitOffset) {}

SourceLocation getEnd() const {
return SourceLocation::getFromRawEncoding(End);
}
RawLocEncoding getBegin() const { return Begin; }
RawLocEncoding getEnd() const { return End; }
};

/// Source range of a skipped preprocessor region
struct PPSkippedRange {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location of beginning of range.
SourceLocation::UIntTy Begin;
RawLocEncoding Begin;
/// Raw source location of end of range.
SourceLocation::UIntTy End;
RawLocEncoding End;

PPSkippedRange(SourceRange R)
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) {
}
PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
: Begin(Begin), End(End) {}

SourceLocation getBegin() const {
return SourceLocation::getFromRawEncoding(Begin);
}
SourceLocation getEnd() const {
return SourceLocation::getFromRawEncoding(End);
}
RawLocEncoding getBegin() const { return Begin; }
RawLocEncoding getEnd() const { return End; }
};

/// Offset in the AST file. Use splitted 64-bit integer into low/high
Expand All @@ -231,26 +225,26 @@ struct UnderalignedInt64 {

/// Source location and bit offset of a declaration.
struct DeclOffset {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location.
SourceLocation::UIntTy Loc = 0;
RawLocEncoding RawLoc = 0;

/// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
/// structure alignment 32-bit and avoid padding gap because undefined
/// value in the padding affects AST hash.
UnderalignedInt64 BitOffset;

DeclOffset() = default;
DeclOffset(SourceLocation Loc, uint64_t BitOffset,
uint64_t DeclTypesBlockStartOffset) {
setLocation(Loc);
DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset,
uint64_t DeclTypesBlockStartOffset)
: RawLoc(RawLoc) {
setBitOffset(BitOffset, DeclTypesBlockStartOffset);
}

void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; }

SourceLocation getLocation() const {
return SourceLocation::getFromRawEncoding(Loc);
}
RawLocEncoding getRawLoc() const { return RawLoc; }

void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) {
BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);
Expand Down
48 changes: 31 additions & 17 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -1771,6 +1771,7 @@ class ASTReader

/// Retrieve the module manager.
ModuleManager &getModuleManager() { return ModuleMgr; }
const ModuleManager &getModuleManager() const { return ModuleMgr; }

/// Retrieve the preprocessor.
Preprocessor &getPreprocessor() const { return PP; }
Expand Down Expand Up @@ -2177,8 +2178,8 @@ class ASTReader

/// Retrieve the global submodule ID given a module and its local ID
/// number.
serialization::SubmoduleID
getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID);
serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M,
unsigned LocalID) const;

/// Retrieve the submodule that corresponds to a global submodule ID.
///
Expand All @@ -2191,7 +2192,7 @@ class ASTReader

/// Retrieve the module file with a given local ID within the specified
/// ModuleFile.
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const;

/// Get an ID for the given module file.
unsigned getModuleFileID(ModuleFile *M);
Expand Down Expand Up @@ -2227,33 +2228,46 @@ class ASTReader
return Sema::AlignPackInfo::getFromRawEncoding(Raw);
}

using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Read a source location from raw form and return it in its
/// originating module file's source location space.
SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
LocSeq *Seq = nullptr) const {
std::pair<SourceLocation, unsigned>
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
LocSeq *Seq = nullptr) const {
return SourceLocationEncoding::decode(Raw, Seq);
}

/// Read a source location from raw form.
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
SourceLocation::UIntTy Raw,
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
LocSeq *Seq = nullptr) const {
SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
return TranslateSourceLocation(ModuleFile, Loc);
if (!MF.ModuleOffsetMap.empty())
ReadModuleOffsetMap(MF);

auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
ModuleFile *OwningModuleFile =
ModuleFileIndex == 0 ? &MF : MF.DependentModules[ModuleFileIndex - 1];

assert(!SourceMgr.isLoadedSourceLocation(Loc) &&
"Run out source location space");

return TranslateSourceLocation(*OwningModuleFile, Loc);
}

/// Translate a source location from another module file's source
/// location space into ours.
SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile,
SourceLocation Loc) const {
if (!ModuleFile.ModuleOffsetMap.empty())
ReadModuleOffsetMap(ModuleFile);
assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
ModuleFile.SLocRemap.end() &&
"Cannot find offset to remap.");
SourceLocation::IntTy Remap =
ModuleFile.SLocRemap.find(Loc.getOffset())->second;
return Loc.getLocWithOffset(Remap);
if (Loc.isInvalid())
return Loc;

// FIXME: TranslateSourceLocation is not re-enterable. It is problematic
// to call TranslateSourceLocation on a translated source location.
// We either need a method to know whether or not a source location is
// translated or refactor the code to make it clear that
// TranslateSourceLocation won't be called with translated source location.

return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the -2 here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a lot - 2 in Serialization and SourceManager. I just tried to follow that. Sadly I didn't find clear comments explaining this. By reading the codes, I think it is trying to skip 2 dummy SLocEntries (the invalid source location and invalid expansion location, see SourceManager::clearIDTables()) since they are not sensitive to the context.

}

/// Read a source location.
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,10 @@ class ASTWriter : public ASTDeserializationListener,
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
LocSeq *Seq = nullptr);

/// Return the raw encodings for source locations.
SourceLocationEncoding::RawLocEncoding
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);

/// Emit a source range.
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
LocSeq *Seq = nullptr);
Expand Down
14 changes: 9 additions & 5 deletions clang/include/clang/Serialization/ModuleFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,6 @@ class ModuleFile {
/// AST file.
const uint32_t *SLocEntryOffsets = nullptr;

/// Remapping table for source locations in this module.
ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
SLocRemap;

// === Identifiers ===

/// The number of identifiers in this AST file.
Expand Down Expand Up @@ -512,9 +508,17 @@ class ModuleFile {
/// List of modules which depend on this module
llvm::SetVector<ModuleFile *> ImportedBy;

/// List of modules which this module depends on
/// List of modules which this module directly imported
llvm::SetVector<ModuleFile *> Imports;

/// List of modules which this modules dependent on. Different
/// from `Imports`, this includes indirectly imported modules too.
/// The order of DependentModules is significant. It should keep
/// the same order with that module file manager when we write
/// the current module file. The value of the member will be initialized
/// in `ASTReader::ReadModuleOffsetMap`.
llvm::SmallVector<ModuleFile *, 16> DependentModules;

/// Determine whether this module was directly imported at
/// any point during translation.
bool isDirectlyImported() const { return DirectlyImported; }
Expand Down
91 changes: 65 additions & 26 deletions clang/include/clang/Serialization/SourceLocationEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,33 @@
//
//===----------------------------------------------------------------------===//
//
// Source locations are stored pervasively in the AST, making up a third of
// the size of typical serialized files. Storing them efficiently is important.
// We wish to encode the SourceLocation from other module file not dependent
// on the other module file. So that the source location changes from other
// module file may not affect the contents of the current module file. Then the
// users don't need to recompile the whole project due to a new line in a module
// unit in the root of the dependency graph.
//
// We use integers optimized by VBR-encoding, because:
// - when abbreviations cannot be used, VBR6 encoding is our only choice
// - in the worst case a SourceLocation can be ~any 32-bit number, but in
// practice they are highly predictable
// To achieve this, we need to encode the index of the module file into the
// encoding of the source location. The encoding of the source location may be:
//
// We encode the integer so that likely values encode as small numbers that
// turn into few VBR chunks:
// - the invalid sentinel location is a very common value: it encodes as 0
// - the "macro or not" bit is stored at the bottom of the integer
// (rather than at the top, as in memory), so macro locations can have
// small representations.
// - related locations (e.g. of a left and right paren pair) are usually
// similar, so when encoding a sequence of locations we store only
// differences between successive elements.
// |-----------------------|-----------------------|
// | A | B | C |
//
// * A: 32 bit. The index of the module file in the module manager + 1. The +1
// here is necessary since we wish 0 stands for the current module file.
// * B: 31 bit. The offset of the source location to the module file containing
// it.
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
// space in case the index of the module file is 0.
//
// Specially, if the index of the module file is 0, we allow to encode a
// sequence of locations we store only differences between successive elements.
//
//===----------------------------------------------------------------------===//

#include <climits>
#include "clang/Basic/SourceLocation.h"
#include "llvm/Support/MathExtras.h"
#include <climits>

#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
Expand All @@ -52,9 +57,13 @@ class SourceLocationEncoding {
friend SourceLocationSequence;

public:
static uint64_t encode(SourceLocation Loc,
SourceLocationSequence * = nullptr);
static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
using RawLocEncoding = uint64_t;

static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence * = nullptr);
static std::pair<SourceLocation, unsigned>
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
};

/// Serialized encoding of a sequence of SourceLocations.
Expand Down Expand Up @@ -149,14 +158,44 @@ class SourceLocationSequence::State {
operator SourceLocationSequence *() { return &Seq; }
};

inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
SourceLocationSequence *Seq) {
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
inline SourceLocationEncoding::RawLocEncoding
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence *Seq) {
// If the source location is a local source location, we can try to optimize
// the similar sequences to only record the differences.
if (!BaseOffset)
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());

if (Loc.isInvalid())
return 0;

// Otherwise, the higher bits are used to store the module file index,
// so it is meaningless to optimize the source locations into small
// integers. Let's try to always use the raw encodings.
assert(Loc.getOffset() >= BaseOffset);
Loc = Loc.getLocWithOffset(-BaseOffset);
RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());

// 16 bits should be sufficient to store the module file index.
assert(BaseModuleFileIndex < (1 << 16));
Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were you going to change this to only reserve 16 bits for module index?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I assert it should be less than (1<<16)

return Encoded;
}
inline SourceLocation
SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
return Seq ? Seq->decode(Encoded)
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
inline std::pair<SourceLocation, unsigned>
SourceLocationEncoding::decode(RawLocEncoding Encoded,
SourceLocationSequence *Seq) {
unsigned ModuleFileIndex = Encoded >> 32;

if (!ModuleFileIndex)
return {Seq ? Seq->decode(Encoded)
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
ModuleFileIndex};

Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));

return {Loc, ModuleFileIndex};
}

} // namespace clang
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/Frontend/ASTUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2374,8 +2374,6 @@ bool ASTUnit::serialize(raw_ostream &OS) {
return serializeUnit(Writer, Buffer, getSema(), OS);
}

using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;

void ASTUnit::TranslateStoredDiagnostics(
FileManager &FileMgr,
SourceManager &SrcMgr,
Expand Down
Loading
Loading