Skip to content

Commit

Permalink
[Modules] No transitive source location change
Browse files Browse the repository at this point in the history
  • Loading branch information
ChuanqiXu9 committed Mar 28, 2024
1 parent c6a65e4 commit 8d4e349
Show file tree
Hide file tree
Showing 15 changed files with 264 additions and 233 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/SourceLocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class SourceLocation {
friend class ASTWriter;
friend class SourceManager;
friend struct llvm::FoldingSetTrait<SourceLocation, void>;
friend class SourceLocationEncoding;

public:
using UIntTy = uint32_t;
Expand Down
56 changes: 25 additions & 31 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Serialization/SourceLocationEncoding.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Bitstream/BitCodes.h"
#include <cassert>
Expand Down Expand Up @@ -175,45 +176,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;

/// Source range/offset of a preprocessed entity.
struct PPEntityOffset {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location of beginning of range.
SourceLocation::UIntTy Begin;
RawLocEncoding Begin;

/// Raw source location of end of range.
SourceLocation::UIntTy End;
RawLocEncoding End;

/// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
uint32_t BitOffset;

PPEntityOffset(SourceRange R, uint32_t BitOffset)
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
BitOffset(BitOffset) {}

SourceLocation getBegin() const {
return SourceLocation::getFromRawEncoding(Begin);
}
PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
: Begin(Begin), End(End), BitOffset(BitOffset) {}

SourceLocation getEnd() const {
return SourceLocation::getFromRawEncoding(End);
}
RawLocEncoding getBegin() const { return Begin; }
RawLocEncoding getEnd() const { return End; }
};

/// Source range of a skipped preprocessor region
struct PPSkippedRange {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location of beginning of range.
SourceLocation::UIntTy Begin;
RawLocEncoding Begin;
/// Raw source location of end of range.
SourceLocation::UIntTy End;
RawLocEncoding End;

PPSkippedRange(SourceRange R)
: Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) {
}
PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
: Begin(Begin), End(End) {}

SourceLocation getBegin() const {
return SourceLocation::getFromRawEncoding(Begin);
}
SourceLocation getEnd() const {
return SourceLocation::getFromRawEncoding(End);
}
RawLocEncoding getBegin() const { return Begin; }
RawLocEncoding getEnd() const { return End; }
};

/// Offset in the AST file. Use splitted 64-bit integer into low/high
Expand All @@ -239,26 +233,26 @@ struct UnderalignedInt64 {

/// Source location and bit offset of a declaration.
struct DeclOffset {
using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Raw source location.
SourceLocation::UIntTy Loc = 0;
RawLocEncoding RawLoc = 0;

/// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
/// structure alignment 32-bit and avoid padding gap because undefined
/// value in the padding affects AST hash.
UnderalignedInt64 BitOffset;

DeclOffset() = default;
DeclOffset(SourceLocation Loc, uint64_t BitOffset,
uint64_t DeclTypesBlockStartOffset) {
setLocation(Loc);
DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset,
uint64_t DeclTypesBlockStartOffset)
: RawLoc(RawLoc) {
setBitOffset(BitOffset, DeclTypesBlockStartOffset);
}

void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; }

SourceLocation getLocation() const {
return SourceLocation::getFromRawEncoding(Loc);
}
RawLocEncoding getRawLoc() const { return RawLoc; }

void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) {
BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);
Expand Down
54 changes: 34 additions & 20 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ class ASTReader
/// Mapping from global submodule IDs to the module file in which the
/// submodule resides along with the offset that should be added to the
/// global submodule ID to produce a local ID.
GlobalSubmoduleMapType GlobalSubmoduleMap;
mutable GlobalSubmoduleMapType GlobalSubmoduleMap;

/// A set of hidden declarations.
using HiddenNames = SmallVector<Decl *, 2>;
Expand Down Expand Up @@ -942,6 +942,12 @@ class ASTReader
/// Sema tracks these to emit deferred diags.
llvm::SmallSetVector<serialization::DeclID, 4> DeclsToCheckForDeferredDiags;

/// The module files imported by different module files. Indirectly imported
/// module files are included too. The information comes from
/// ReadModuleOffsetMap(ModuleFile&).
mutable llvm::DenseMap<ModuleFile *, llvm::SmallVector<ModuleFile *>>
ImportedModuleFiles;

private:
struct ImportedSubmodule {
serialization::SubmoduleID ID;
Expand Down Expand Up @@ -1761,6 +1767,7 @@ class ASTReader

/// Retrieve the module manager.
ModuleManager &getModuleManager() { return ModuleMgr; }
const ModuleManager &getModuleManager() const { return ModuleMgr; }

/// Retrieve the preprocessor.
Preprocessor &getPreprocessor() const { return PP; }
Expand Down Expand Up @@ -2170,8 +2177,8 @@ class ASTReader

/// Retrieve the global submodule ID given a module and its local ID
/// number.
serialization::SubmoduleID
getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID);
serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M,
unsigned LocalID) const;

/// Retrieve the submodule that corresponds to a global submodule ID.
///
Expand All @@ -2184,7 +2191,7 @@ class ASTReader

/// Retrieve the module file with a given local ID within the specified
/// ModuleFile.
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);
ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const;

/// Get an ID for the given module file.
unsigned getModuleFileID(ModuleFile *M);
Expand Down Expand Up @@ -2220,40 +2227,47 @@ class ASTReader
return Sema::AlignPackInfo::getFromRawEncoding(Raw);
}

using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;

/// Read a source location from raw form and return it in its
/// originating module file's source location space.
SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
LocSeq *Seq = nullptr) const {
std::pair<SourceLocation, unsigned>
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
LocSeq *Seq = nullptr) const {
return SourceLocationEncoding::decode(Raw, Seq);
}

/// Read a source location from raw form.
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
SourceLocation::UIntTy Raw,
LocSeq *Seq = nullptr) const {
SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
return TranslateSourceLocation(ModuleFile, Loc);
SourceLocation ReadRawSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
LocSeq *Seq = nullptr) const {
if (!MF.ModuleOffsetMap.empty())
ReadModuleOffsetMap(MF);

auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
ModuleFile *ModuleFileHomingLoc =
ModuleFileIndex ? ImportedModuleFiles[&MF][ModuleFileIndex - 1] : &MF;
return TranslateSourceLocation(*ModuleFileHomingLoc, Loc);
}

/// Translate a source location from another module file's source
/// location space into ours.
SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile,
SourceLocation Loc) const {
if (!ModuleFile.ModuleOffsetMap.empty())
ReadModuleOffsetMap(ModuleFile);
assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
ModuleFile.SLocRemap.end() &&
"Cannot find offset to remap.");
SourceLocation::IntTy Remap =
ModuleFile.SLocRemap.find(Loc.getOffset())->second;
return Loc.getLocWithOffset(Remap);
if (Loc.isInvalid())
return Loc;

// It implies that the Loc is already translated.
if (SourceMgr.isLoadedSourceLocation(Loc))
return Loc;

return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
}

/// Read a source location.
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
const RecordDataImpl &Record, unsigned &Idx,
LocSeq *Seq = nullptr) {
return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
return ReadRawSourceLocation(ModuleFile, Record[Idx++], Seq);
}

/// Read a FileID.
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,10 @@ class ASTWriter : public ASTDeserializationListener,
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
LocSeq *Seq = nullptr);

/// Return the raw encodings for source locations.
SourceLocationEncoding::RawLocEncoding
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);

/// Emit a source range.
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
LocSeq *Seq = nullptr);
Expand Down
4 changes: 0 additions & 4 deletions clang/include/clang/Serialization/ModuleFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,6 @@ class ModuleFile {
/// AST file.
const uint32_t *SLocEntryOffsets = nullptr;

/// Remapping table for source locations in this module.
ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
SLocRemap;

// === Identifiers ===

/// The number of identifiers in this AST file.
Expand Down
78 changes: 53 additions & 25 deletions clang/include/clang/Serialization/SourceLocationEncoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,26 @@
//
//===----------------------------------------------------------------------===//
//
// Source locations are stored pervasively in the AST, making up a third of
// the size of typical serialized files. Storing them efficiently is important.
// We wish to encode the SourceLocation from other module file not dependent
// on the other module file. So that the source location changes from other
// module file may not affect the contents of the current module file. Then the
// users don't need to recompile the whole project due to a new line in a module
// unit in the root of the dependency graph.
//
// We use integers optimized by VBR-encoding, because:
// - when abbreviations cannot be used, VBR6 encoding is our only choice
// - in the worst case a SourceLocation can be ~any 32-bit number, but in
// practice they are highly predictable
// To achieve this, we need to encode the index of the module file into the
// encoding of the source location. The encoding of the source location may be:
//
// We encode the integer so that likely values encode as small numbers that
// turn into few VBR chunks:
// - the invalid sentinel location is a very common value: it encodes as 0
// - the "macro or not" bit is stored at the bottom of the integer
// (rather than at the top, as in memory), so macro locations can have
// small representations.
// - related locations (e.g. of a left and right paren pair) are usually
// similar, so when encoding a sequence of locations we store only
// differences between successive elements.
// |-----------------------|-----------------------|
// | A | B | C |
//
// * A: 32 bit. The index of the module file in the module manager + 1. The +1
// here
// is necessary since we wish 0 stands for the current module file.
// * B: 31 bit. The offset of the source location to the module file containing
// it.
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
// space
// in case the index of the module file is 0.
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -52,11 +55,20 @@ class SourceLocationEncoding {
friend SourceLocationSequence;

public:
static uint64_t encode(SourceLocation Loc,
SourceLocationSequence * = nullptr);
static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
using RawLocEncoding = uint64_t;

static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence * = nullptr);
static std::pair<SourceLocation, unsigned>
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
};

/// TODO: Remove SourceLocationSequence since it is not used now.
/// Since we will put the index for ModuleFile in the high bits in the encodings
/// for source locations, it is meaningless to reduce the size of source
/// locations.
///
/// Serialized encoding of a sequence of SourceLocations.
///
/// Optimized to produce small values when locations with the sequence are
Expand Down Expand Up @@ -149,14 +161,30 @@ class SourceLocationSequence::State {
operator SourceLocationSequence *() { return &Seq; }
};

inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
SourceLocationSequence *Seq) {
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
inline SourceLocationEncoding::RawLocEncoding
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
unsigned BaseModuleFileIndex,
SourceLocationSequence *Seq) {
if (Loc.isInvalid())
return 0;

assert(Loc.getOffset() >= BaseOffset);
Loc = Loc.getLocWithOffset(-BaseOffset);
RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
assert(Encoded < ((RawLocEncoding)1 << 32));

assert(BaseModuleFileIndex < ((RawLocEncoding)1 << 32));
Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
return Encoded;
}
inline SourceLocation
SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
return Seq ? Seq->decode(Encoded)
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
inline std::pair<SourceLocation, unsigned>
SourceLocationEncoding::decode(RawLocEncoding Encoded,
SourceLocationSequence *Seq) {
unsigned ModuleFileIndex = Encoded >> 32;
Encoded &= ((RawLocEncoding)1 << 33) - 1;
SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));

return {Loc, ModuleFileIndex};
}

} // namespace clang
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/Frontend/ASTUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2373,8 +2373,6 @@ bool ASTUnit::serialize(raw_ostream &OS) {
return serializeUnit(Writer, Buffer, getSema(), OS);
}

using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;

void ASTUnit::TranslateStoredDiagnostics(
FileManager &FileMgr,
SourceManager &SrcMgr,
Expand Down
Loading

0 comments on commit 8d4e349

Please sign in to comment.