From 8d4e349710f4ca84a7ad2dd8aa71afa50b730dbb Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 18 Mar 2024 08:36:55 +0800 Subject: [PATCH] [Modules] No transitive source location change --- clang/include/clang/Basic/SourceLocation.h | 1 + .../include/clang/Serialization/ASTBitCodes.h | 56 ++++++------ clang/include/clang/Serialization/ASTReader.h | 54 +++++++----- clang/include/clang/Serialization/ASTWriter.h | 4 + .../include/clang/Serialization/ModuleFile.h | 4 - .../Serialization/SourceLocationEncoding.h | 78 +++++++++++------ clang/lib/Frontend/ASTUnit.cpp | 2 - clang/lib/Serialization/ASTReader.cpp | 86 +++++++------------ clang/lib/Serialization/ASTReaderDecl.cpp | 2 +- clang/lib/Serialization/ASTWriter.cpp | 41 +++++++-- clang/lib/Serialization/ASTWriterDecl.cpp | 8 +- clang/lib/Serialization/ModuleFile.cpp | 1 - .../no-transitive-source-location-change.cppm | 69 +++++++++++++++ clang/test/Modules/pr61067.cppm | 25 ------ .../SourceLocationEncodingTest.cpp | 66 ++------------ 15 files changed, 264 insertions(+), 233 deletions(-) create mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index 00b1e0fa855b7a..7a0f5ba8d1270b 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -90,6 +90,7 @@ class SourceLocation { friend class ASTWriter; friend class SourceManager; friend struct llvm::FoldingSetTrait; + friend class SourceLocationEncoding; public: using UIntTy = uint32_t; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index f31efa5117f0d1..628ce03572fea6 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -22,6 +22,7 @@ #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Serialization/SourceLocationEncoding.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Bitstream/BitCodes.h" #include @@ -175,45 +176,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1; /// Source range/offset of a preprocessed entity. struct PPEntityOffset { + using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; + /// Raw source location of beginning of range. - SourceLocation::UIntTy Begin; + RawLocEncoding Begin; /// Raw source location of end of range. - SourceLocation::UIntTy End; + RawLocEncoding End; /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase. uint32_t BitOffset; - PPEntityOffset(SourceRange R, uint32_t BitOffset) - : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()), - BitOffset(BitOffset) {} - - SourceLocation getBegin() const { - return SourceLocation::getFromRawEncoding(Begin); - } + PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset) + : Begin(Begin), End(End), BitOffset(BitOffset) {} - SourceLocation getEnd() const { - return SourceLocation::getFromRawEncoding(End); - } + RawLocEncoding getBegin() const { return Begin; } + RawLocEncoding getEnd() const { return End; } }; /// Source range of a skipped preprocessor region struct PPSkippedRange { + using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; + /// Raw source location of beginning of range. - SourceLocation::UIntTy Begin; + RawLocEncoding Begin; /// Raw source location of end of range. - SourceLocation::UIntTy End; + RawLocEncoding End; - PPSkippedRange(SourceRange R) - : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) { - } + PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End) + : Begin(Begin), End(End) {} - SourceLocation getBegin() const { - return SourceLocation::getFromRawEncoding(Begin); - } - SourceLocation getEnd() const { - return SourceLocation::getFromRawEncoding(End); - } + RawLocEncoding getBegin() const { return Begin; } + RawLocEncoding getEnd() const { return End; } }; /// Offset in the AST file. Use splitted 64-bit integer into low/high @@ -239,8 +233,10 @@ struct UnderalignedInt64 { /// Source location and bit offset of a declaration. struct DeclOffset { + using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; + /// Raw source location. - SourceLocation::UIntTy Loc = 0; + RawLocEncoding RawLoc = 0; /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep /// structure alignment 32-bit and avoid padding gap because undefined @@ -248,17 +244,15 @@ struct DeclOffset { UnderalignedInt64 BitOffset; DeclOffset() = default; - DeclOffset(SourceLocation Loc, uint64_t BitOffset, - uint64_t DeclTypesBlockStartOffset) { - setLocation(Loc); + DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset, + uint64_t DeclTypesBlockStartOffset) + : RawLoc(RawLoc) { setBitOffset(BitOffset, DeclTypesBlockStartOffset); } - void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); } + void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; } - SourceLocation getLocation() const { - return SourceLocation::getFromRawEncoding(Loc); - } + RawLocEncoding getRawLoc() const { return RawLoc; } void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) { BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset); diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 370d8037a4da17..017c6b76a91495 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -696,7 +696,7 @@ class ASTReader /// Mapping from global submodule IDs to the module file in which the /// submodule resides along with the offset that should be added to the /// global submodule ID to produce a local ID. - GlobalSubmoduleMapType GlobalSubmoduleMap; + mutable GlobalSubmoduleMapType GlobalSubmoduleMap; /// A set of hidden declarations. using HiddenNames = SmallVector; @@ -942,6 +942,12 @@ class ASTReader /// Sema tracks these to emit deferred diags. llvm::SmallSetVector DeclsToCheckForDeferredDiags; + /// The module files imported by different module files. Indirectly imported + /// module files are included too. The information comes from + /// ReadModuleOffsetMap(ModuleFile&). + mutable llvm::DenseMap> + ImportedModuleFiles; + private: struct ImportedSubmodule { serialization::SubmoduleID ID; @@ -1761,6 +1767,7 @@ class ASTReader /// Retrieve the module manager. ModuleManager &getModuleManager() { return ModuleMgr; } + const ModuleManager &getModuleManager() const { return ModuleMgr; } /// Retrieve the preprocessor. Preprocessor &getPreprocessor() const { return PP; } @@ -2170,8 +2177,8 @@ class ASTReader /// Retrieve the global submodule ID given a module and its local ID /// number. - serialization::SubmoduleID - getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID); + serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M, + unsigned LocalID) const; /// Retrieve the submodule that corresponds to a global submodule ID. /// @@ -2184,7 +2191,7 @@ class ASTReader /// Retrieve the module file with a given local ID within the specified /// ModuleFile. - ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID); + ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const; /// Get an ID for the given module file. unsigned getModuleFileID(ModuleFile *M); @@ -2220,40 +2227,47 @@ class ASTReader return Sema::AlignPackInfo::getFromRawEncoding(Raw); } + using RawLocEncoding = SourceLocationEncoding::RawLocEncoding; + /// Read a source location from raw form and return it in its /// originating module file's source location space. - SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw, - LocSeq *Seq = nullptr) const { + std::pair + ReadUntranslatedSourceLocation(RawLocEncoding Raw, + LocSeq *Seq = nullptr) const { return SourceLocationEncoding::decode(Raw, Seq); } /// Read a source location from raw form. - SourceLocation ReadSourceLocation(ModuleFile &ModuleFile, - SourceLocation::UIntTy Raw, - LocSeq *Seq = nullptr) const { - SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq); - return TranslateSourceLocation(ModuleFile, Loc); + SourceLocation ReadRawSourceLocation(ModuleFile &MF, RawLocEncoding Raw, + LocSeq *Seq = nullptr) const { + if (!MF.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(MF); + + auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq); + ModuleFile *ModuleFileHomingLoc = + ModuleFileIndex ? ImportedModuleFiles[&MF][ModuleFileIndex - 1] : &MF; + return TranslateSourceLocation(*ModuleFileHomingLoc, Loc); } /// Translate a source location from another module file's source /// location space into ours. SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile, SourceLocation Loc) const { - if (!ModuleFile.ModuleOffsetMap.empty()) - ReadModuleOffsetMap(ModuleFile); - assert(ModuleFile.SLocRemap.find(Loc.getOffset()) != - ModuleFile.SLocRemap.end() && - "Cannot find offset to remap."); - SourceLocation::IntTy Remap = - ModuleFile.SLocRemap.find(Loc.getOffset())->second; - return Loc.getLocWithOffset(Remap); + if (Loc.isInvalid()) + return Loc; + + // It implies that the Loc is already translated. + if (SourceMgr.isLoadedSourceLocation(Loc)) + return Loc; + + return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2); } /// Read a source location. SourceLocation ReadSourceLocation(ModuleFile &ModuleFile, const RecordDataImpl &Record, unsigned &Idx, LocSeq *Seq = nullptr) { - return ReadSourceLocation(ModuleFile, Record[Idx++], Seq); + return ReadRawSourceLocation(ModuleFile, Record[Idx++], Seq); } /// Read a FileID. diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 3ed9803fa3745b..70bf204ed598ef 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -648,6 +648,10 @@ class ASTWriter : public ASTDeserializationListener, void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, LocSeq *Seq = nullptr); + /// Return the raw encodings for source locations. + SourceLocationEncoding::RawLocEncoding + getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr); + /// Emit a source range. void AddSourceRange(SourceRange Range, RecordDataImpl &Record, LocSeq *Seq = nullptr); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index bc0aa89966c2b4..ea24b44e5e411b 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -295,10 +295,6 @@ class ModuleFile { /// AST file. const uint32_t *SLocEntryOffsets = nullptr; - /// Remapping table for source locations in this module. - ContinuousRangeMap - SLocRemap; - // === Identifiers === /// The number of identifiers in this AST file. diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h index 9bb0dbe2e4d6f6..3925b08c1837ed 100644 --- a/clang/include/clang/Serialization/SourceLocationEncoding.h +++ b/clang/include/clang/Serialization/SourceLocationEncoding.h @@ -6,23 +6,26 @@ // //===----------------------------------------------------------------------===// // -// Source locations are stored pervasively in the AST, making up a third of -// the size of typical serialized files. Storing them efficiently is important. +// We wish to encode the SourceLocation from other module file not dependent +// on the other module file. So that the source location changes from other +// module file may not affect the contents of the current module file. Then the +// users don't need to recompile the whole project due to a new line in a module +// unit in the root of the dependency graph. // -// We use integers optimized by VBR-encoding, because: -// - when abbreviations cannot be used, VBR6 encoding is our only choice -// - in the worst case a SourceLocation can be ~any 32-bit number, but in -// practice they are highly predictable +// To achieve this, we need to encode the index of the module file into the +// encoding of the source location. The encoding of the source location may be: // -// We encode the integer so that likely values encode as small numbers that -// turn into few VBR chunks: -// - the invalid sentinel location is a very common value: it encodes as 0 -// - the "macro or not" bit is stored at the bottom of the integer -// (rather than at the top, as in memory), so macro locations can have -// small representations. -// - related locations (e.g. of a left and right paren pair) are usually -// similar, so when encoding a sequence of locations we store only -// differences between successive elements. +// |-----------------------|-----------------------| +// | A | B | C | +// +// * A: 32 bit. The index of the module file in the module manager + 1. The +1 +// here +// is necessary since we wish 0 stands for the current module file. +// * B: 31 bit. The offset of the source location to the module file containing +// it. +// * C: The macro bit. We rotate it to the lowest bit so that we can save some +// space +// in case the index of the module file is 0. // //===----------------------------------------------------------------------===// @@ -52,11 +55,20 @@ class SourceLocationEncoding { friend SourceLocationSequence; public: - static uint64_t encode(SourceLocation Loc, - SourceLocationSequence * = nullptr); - static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); + using RawLocEncoding = uint64_t; + + static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset, + unsigned BaseModuleFileIndex, + SourceLocationSequence * = nullptr); + static std::pair + decode(RawLocEncoding, SourceLocationSequence * = nullptr); }; +/// TODO: Remove SourceLocationSequence since it is not used now. +/// Since we will put the index for ModuleFile in the high bits in the encodings +/// for source locations, it is meaningless to reduce the size of source +/// locations. +/// /// Serialized encoding of a sequence of SourceLocations. /// /// Optimized to produce small values when locations with the sequence are @@ -149,14 +161,30 @@ class SourceLocationSequence::State { operator SourceLocationSequence *() { return &Seq; } }; -inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, - SourceLocationSequence *Seq) { - return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); +inline SourceLocationEncoding::RawLocEncoding +SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset, + unsigned BaseModuleFileIndex, + SourceLocationSequence *Seq) { + if (Loc.isInvalid()) + return 0; + + assert(Loc.getOffset() >= BaseOffset); + Loc = Loc.getLocWithOffset(-BaseOffset); + RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding()); + assert(Encoded < ((RawLocEncoding)1 << 32)); + + assert(BaseModuleFileIndex < ((RawLocEncoding)1 << 32)); + Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32; + return Encoded; } -inline SourceLocation -SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { - return Seq ? Seq->decode(Encoded) - : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); +inline std::pair +SourceLocationEncoding::decode(RawLocEncoding Encoded, + SourceLocationSequence *Seq) { + unsigned ModuleFileIndex = Encoded >> 32; + Encoded &= ((RawLocEncoding)1 << 33) - 1; + SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); + + return {Loc, ModuleFileIndex}; } } // namespace clang diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 3610a08831e79a..1c655260b09eb5 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -2373,8 +2373,6 @@ bool ASTUnit::serialize(raw_ostream &OS) { return serializeUnit(Writer, Buffer, getSema(), OS); } -using SLocRemap = ContinuousRangeMap; - void ASTUnit::TranslateStoredDiagnostics( FileManager &FileMgr, SourceManager &SrcMgr, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 28e8d27fef08c6..625666ead0fb49 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1645,7 +1645,7 @@ bool ASTReader::ReadSLocEntry(int ID) { if (!File) return true; - SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]); + SourceLocation IncludeLoc = ReadRawSourceLocation(*F, Record[1]); if (IncludeLoc.isInvalid() && F->Kind != MK_MainFile) { // This is the module's main file. IncludeLoc = getImportLocation(F); @@ -1687,7 +1687,7 @@ bool ASTReader::ReadSLocEntry(int ID) { unsigned Offset = Record[0]; SrcMgr::CharacteristicKind FileCharacter = (SrcMgr::CharacteristicKind)Record[2]; - SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]); + SourceLocation IncludeLoc = ReadRawSourceLocation(*F, Record[1]); if (IncludeLoc.isInvalid() && F->isModule()) { IncludeLoc = getImportLocation(F); } @@ -1707,9 +1707,9 @@ bool ASTReader::ReadSLocEntry(int ID) { case SM_SLOC_EXPANSION_ENTRY: { LocSeq::State Seq; - SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq); - SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq); - SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq); + SourceLocation SpellingLoc = ReadRawSourceLocation(*F, Record[1], Seq); + SourceLocation ExpansionBegin = ReadRawSourceLocation(*F, Record[2], Seq); + SourceLocation ExpansionEnd = ReadRawSourceLocation(*F, Record[3], Seq); SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd, Record[5], Record[4], ID, BaseOffset + Record[0]); @@ -3038,8 +3038,10 @@ ASTReader::ReadControlBlock(ModuleFile &F, // The import location will be the local one for now; we will adjust // all import locations of module imports after the global source // location info are setup, in ReadAST. - SourceLocation ImportLoc = + auto [ImportLoc, ImportModuleFileIndex] = ReadUntranslatedSourceLocation(Record[Idx++]); + // The import location must belong to the current module file itself. + assert(ImportModuleFileIndex == 0); off_t StoredSize = !IsImportingStdCXXModule ? (off_t)Record[Idx++] : 0; time_t StoredModTime = !IsImportingStdCXXModule ? (time_t)Record[Idx++] : 0; @@ -3658,13 +3660,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, std::make_pair(SourceManager::MaxLoadedOffset - F.SLocEntryBaseOffset - SLocSpaceSize,&F)); - // Initialize the remapping table. - // Invalid stays invalid. - F.SLocRemap.insertOrReplace(std::make_pair(0U, 0)); - // This module. Base was 2 when being compiled. - F.SLocRemap.insertOrReplace(std::make_pair( - 2U, static_cast(F.SLocEntryBaseOffset - 2))); - TotalNumSLocEntries += F.LocalNumSLocEntries; break; } @@ -3941,7 +3936,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, if (Record.size() != 1) return llvm::createStringError(std::errc::illegal_byte_sequence, "invalid pragma optimize record"); - OptimizeOffPragmaLocation = ReadSourceLocation(F, Record[0]); + OptimizeOffPragmaLocation = ReadRawSourceLocation(F, Record[0]); break; case MSSTRUCT_PRAGMA_OPTIONS: @@ -3957,7 +3952,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, std::errc::illegal_byte_sequence, "invalid pragma pointers to members record"); PragmaMSPointersToMembersState = Record[0]; - PointersToMembersPragmaLocation = ReadSourceLocation(F, Record[1]); + PointersToMembersPragmaLocation = ReadRawSourceLocation(F, Record[1]); break; case UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES: @@ -3978,7 +3973,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, return llvm::createStringError(std::errc::illegal_byte_sequence, "invalid pragma pack record"); PragmaAlignPackCurrentValue = ReadAlignPackInfo(Record[0]); - PragmaAlignPackCurrentLocation = ReadSourceLocation(F, Record[1]); + PragmaAlignPackCurrentLocation = ReadRawSourceLocation(F, Record[1]); unsigned NumStackEntries = Record[2]; unsigned Idx = 3; // Reset the stack when importing a new module. @@ -3986,8 +3981,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, for (unsigned I = 0; I < NumStackEntries; ++I) { PragmaAlignPackStackEntry Entry; Entry.Value = ReadAlignPackInfo(Record[Idx++]); - Entry.Location = ReadSourceLocation(F, Record[Idx++]); - Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]); + Entry.Location = ReadRawSourceLocation(F, Record[Idx++]); + Entry.PushLocation = ReadRawSourceLocation(F, Record[Idx++]); PragmaAlignPackStrings.push_back(ReadString(Record, Idx)); Entry.SlotLabel = PragmaAlignPackStrings.back(); PragmaAlignPackStack.push_back(Entry); @@ -4000,7 +3995,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, return llvm::createStringError(std::errc::illegal_byte_sequence, "invalid pragma float control record"); FpPragmaCurrentValue = FPOptionsOverride::getFromOpaqueInt(Record[0]); - FpPragmaCurrentLocation = ReadSourceLocation(F, Record[1]); + FpPragmaCurrentLocation = ReadRawSourceLocation(F, Record[1]); unsigned NumStackEntries = Record[2]; unsigned Idx = 3; // Reset the stack when importing a new module. @@ -4008,8 +4003,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, for (unsigned I = 0; I < NumStackEntries; ++I) { FpPragmaStackEntry Entry; Entry.Value = FPOptionsOverride::getFromOpaqueInt(Record[Idx++]); - Entry.Location = ReadSourceLocation(F, Record[Idx++]); - Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]); + Entry.Location = ReadRawSourceLocation(F, Record[Idx++]); + Entry.PushLocation = ReadRawSourceLocation(F, Record[Idx++]); FpPragmaStrings.push_back(ReadString(Record, Idx)); Entry.SlotLabel = FpPragmaStrings.back(); FpPragmaStack.push_back(Entry); @@ -4033,18 +4028,7 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { const unsigned char *DataEnd = Data + F.ModuleOffsetMap.size(); F.ModuleOffsetMap = StringRef(); - // If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders. - if (F.SLocRemap.find(0) == F.SLocRemap.end()) { - F.SLocRemap.insert(std::make_pair(0U, 0)); - F.SLocRemap.insert(std::make_pair(2U, 1)); - } - - // Continuous range maps we may be updating in our module. - using SLocRemapBuilder = - ContinuousRangeMap::Builder; using RemapBuilder = ContinuousRangeMap::Builder; - SLocRemapBuilder SLocRemap(F.SLocRemap); RemapBuilder IdentifierRemap(F.IdentifierRemap); RemapBuilder MacroRemap(F.MacroRemap); RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); @@ -4053,6 +4037,9 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { RemapBuilder DeclRemap(F.DeclRemap); RemapBuilder TypeRemap(F.TypeRemap); + auto &ImportedModuleVector = ImportedModuleFiles[&F]; + assert(ImportedModuleVector.empty()); + while (Data < DataEnd) { // FIXME: Looking up dependency modules by filename is horrible. Let's // start fixing this with prebuilt, explicit and implicit modules and see @@ -4076,8 +4063,8 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { return; } - SourceLocation::UIntTy SLocOffset = - endian::readNext(Data); + ImportedModuleVector.push_back(OM); + uint32_t IdentifierIDOffset = endian::readNext(Data); uint32_t MacroIDOffset = @@ -4101,13 +4088,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { static_cast(BaseOffset - Offset))); }; - constexpr SourceLocation::UIntTy SLocNone = - std::numeric_limits::max(); - if (SLocOffset != SLocNone) - SLocRemap.insert(std::make_pair( - SLocOffset, static_cast( - OM->SLocEntryBaseOffset - SLocOffset))); - mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap); mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap); mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID, @@ -5768,7 +5748,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]); SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]); Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++]; - SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]); + SourceLocation DefinitionLoc = ReadRawSourceLocation(F, Record[Idx++]); bool IsFramework = Record[Idx++]; bool IsExplicit = Record[Idx++]; bool IsSystem = Record[Idx++]; @@ -6243,8 +6223,8 @@ SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) { unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID; assert(LocalIndex < M->NumPreprocessedSkippedRanges); PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex]; - SourceRange Range(TranslateSourceLocation(*M, RawRange.getBegin()), - TranslateSourceLocation(*M, RawRange.getEnd())); + SourceRange Range(ReadRawSourceLocation(*M, RawRange.getBegin()), + ReadRawSourceLocation(*M, RawRange.getEnd())); assert(Range.isValid()); return Range; } @@ -6280,8 +6260,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { return nullptr; // Read the record. - SourceRange Range(TranslateSourceLocation(M, PPOffs.getBegin()), - TranslateSourceLocation(M, PPOffs.getEnd())); + SourceRange Range(ReadRawSourceLocation(M, PPOffs.getBegin()), + ReadRawSourceLocation(M, PPOffs.getEnd())); PreprocessingRecord &PPRec = *PP.getPreprocessingRecord(); StringRef Blob; RecordData Record; @@ -6393,7 +6373,7 @@ struct PPEntityComp { } SourceLocation getLoc(const PPEntityOffset &PPE) const { - return Reader.TranslateSourceLocation(M, PPE.getBegin()); + return Reader.ReadRawSourceLocation(M, PPE.getBegin()); } }; @@ -6437,7 +6417,7 @@ PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc, PPI = First; std::advance(PPI, Half); if (SourceMgr.isBeforeInTranslationUnit( - TranslateSourceLocation(M, PPI->getEnd()), Loc)) { + ReadRawSourceLocation(M, PPI->getEnd()), Loc)) { First = PPI; ++First; Count = Count - Half - 1; @@ -6478,7 +6458,7 @@ std::optional ASTReader::isPreprocessedEntityInFileID(unsigned Index, unsigned LocalIndex = PPInfo.second; const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex]; - SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin()); + SourceLocation Loc = ReadRawSourceLocation(M, PPOffs.getBegin()); if (Loc.isInvalid()) return false; @@ -6622,7 +6602,7 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { while (NumLocations--) { assert(Idx < Record.size() && "Invalid data, missing pragma diagnostic states"); - SourceLocation Loc = ReadSourceLocation(F, Record[Idx++]); + SourceLocation Loc = ReadRawSourceLocation(F, Record[Idx++]); auto IDAndOffset = SourceMgr.getDecomposedLoc(Loc); assert(IDAndOffset.first.isValid() && "invalid FileID for transition"); assert(IDAndOffset.second == 0 && "not a start location for a FileID"); @@ -6644,7 +6624,7 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { // Read the final state. assert(Idx < Record.size() && "Invalid data, missing final pragma diagnostic state"); - SourceLocation CurStateLoc = ReadSourceLocation(F, Record[Idx++]); + SourceLocation CurStateLoc = ReadRawSourceLocation(F, Record[Idx++]); auto *CurState = ReadDiagState(*FirstState, false); if (!F.isModule()) { @@ -8944,7 +8924,7 @@ MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) { } serialization::SubmoduleID -ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) { +ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) const { if (LocalID < NUM_PREDEF_SUBMODULE_IDS) return LocalID; @@ -8977,7 +8957,7 @@ Module *ASTReader::getModule(unsigned ID) { return getSubmodule(ID); } -ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) { +ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) const { if (ID & 1) { // It's a module, look it up by submodule ID. auto I = GlobalSubmoduleMap.find(getGlobalSubmoduleID(M, ID >> 1)); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index a22f760408c634..f3192b3d67508c 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -3246,7 +3246,7 @@ ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) { ModuleFile *M = I->second; const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; - Loc = TranslateSourceLocation(*M, DOffs.getLocation()); + Loc = ReadRawSourceLocation(*M, DOffs.getRawLoc()); return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset)); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 221409d011a38c..95fd012f2aa63f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2657,8 +2657,10 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; assert((Offset >> 32) == 0 && "Preprocessed entity offset too large"); - PreprocessedEntityOffsets.push_back( - PPEntityOffset(getAdjustedRange((*E)->getSourceRange()), Offset)); + SourceRange R = getAdjustedRange((*E)->getSourceRange()); + PreprocessedEntityOffsets.emplace_back( + getRawSourceLocationEncoding(R.getBegin()), + getRawSourceLocationEncoding(R.getEnd()), Offset); if (auto *MD = dyn_cast(*E)) { // Record this macro definition's ID. @@ -2725,7 +2727,9 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, std::vector SerializedSkippedRanges; SerializedSkippedRanges.reserve(SkippedRanges.size()); for (auto const& Range : SkippedRanges) - SerializedSkippedRanges.emplace_back(Range); + SerializedSkippedRanges.emplace_back( + getRawSourceLocationEncoding(Range.getBegin()), + getRawSourceLocationEncoding(Range.getEnd())); using namespace llvm; auto Abbrev = std::make_shared(); @@ -2891,8 +2895,8 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { ParentID = SubmoduleIDs[Mod->Parent]; } - uint64_t DefinitionLoc = - SourceLocationEncoding::encode(getAdjustedLocation(Mod->DefinitionLoc)); + SourceLocationEncoding::RawLocEncoding DefinitionLoc = + getRawSourceLocationEncoding(getAdjustedLocation(Mod->DefinitionLoc)); // Emit the definition of the block. { @@ -5107,7 +5111,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. - writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries); writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers); writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); writeBaseIDOrNone(M.BasePreprocessedEntityID, @@ -5556,10 +5559,34 @@ void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) { Record.push_back(getAdjustedFileID(FID).getOpaqueValue()); } +SourceLocationEncoding::RawLocEncoding +ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { + unsigned BaseOffset = 0; + unsigned ModuleFileIndex = 0; + + // See SourceLocationEncoding.h for the encoding details. + if (Context->getSourceManager().isLoadedSourceLocation(Loc) && + Loc.isValid()) { + assert(getChain()); + auto SLocMapI = getChain()->GlobalSLocOffsetMap.find( + SourceManager::MaxLoadedOffset - Loc.getOffset() - 1); + assert(SLocMapI != getChain()->GlobalSLocOffsetMap.end() && + "Corrupted global sloc offset map"); + ModuleFile *F = SLocMapI->second; + BaseOffset = F->SLocEntryBaseOffset - 2; + // 0 means the location is not loaded. So we need to add 1 to the index to + // make it clear. + ModuleFileIndex = F->Index + 1; + assert(&getChain()->getModuleManager()[F->Index] == F); + } + + return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex, Seq); +} + void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, SourceLocationSequence *Seq) { Loc = getAdjustedLocation(Loc); - Record.push_back(SourceLocationEncoding::encode(Loc, Seq)); + Record.push_back(getRawSourceLocationEncoding(Loc, Seq)); } void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record, diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 86f64bf2a24250..b113c433c4223c 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2777,14 +2777,16 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) { // Record the offset for this declaration SourceLocation Loc = D->getLocation(); + SourceLocationEncoding::RawLocEncoding RawLoc = + getRawSourceLocationEncoding(getAdjustedLocation(Loc)); + unsigned Index = ID - FirstDeclID; if (DeclOffsets.size() == Index) - DeclOffsets.emplace_back(getAdjustedLocation(Loc), Offset, - DeclTypesBlockStartOffset); + DeclOffsets.emplace_back(RawLoc, Offset, DeclTypesBlockStartOffset); else if (DeclOffsets.size() < Index) { // FIXME: Can/should this happen? DeclOffsets.resize(Index+1); - DeclOffsets[Index].setLocation(getAdjustedLocation(Loc)); + DeclOffsets[Index].setRawLoc(RawLoc); DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset); } else { llvm_unreachable("declarations should be emitted in ID order"); diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp index db896fd361159d..2c42d33a8f5dd3 100644 --- a/clang/lib/Serialization/ModuleFile.cpp +++ b/clang/lib/Serialization/ModuleFile.cpp @@ -59,7 +59,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { // Remapping tables. llvm::errs() << " Base source location offset: " << SLocEntryBaseOffset << '\n'; - dumpLocalRemap("Source location offset local -> global map", SLocRemap); llvm::errs() << " Base identifier ID: " << BaseIdentifierID << '\n' << " Number of identifiers: " << LocalNumIdentifiers << '\n'; diff --git a/clang/test/Modules/no-transitive-source-location-change.cppm b/clang/test/Modules/no-transitive-source-location-change.cppm new file mode 100644 index 00000000000000..b876fbb6d9f0cf --- /dev/null +++ b/clang/test/Modules/no-transitive-source-location-change.cppm @@ -0,0 +1,69 @@ +// Testing that adding a new line in a module interface unit won't cause the BMI +// of consuming module unit changes. +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm + +// +// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o %t/A.v1.pcm +// +// The BMI may not be the same since the source location differs. +// RUN: not diff %t/A.pcm %t/A.v1.pcm &> /dev/null +// +// The BMI of B shouldn't change since all the locations remain the same. +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \ +// RUN: -o %t/B.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \ +// RUN: -o %t/B.v1.pcm +// RUN: diff %t/B.v1.pcm %t/B.pcm &> /dev/null +// +// The BMI of C may change since the locations for instantiations changes. +// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \ +// RUN: -o %t/C.pcm +// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \ +// RUN: -o %t/C.v1.pcm +// RUN: not diff %t/C.v1.pcm %t/C.pcm &> /dev/null + +//--- A.cppm +export module A; +export template +struct C { + T func() { + return T(43); + } +}; +export int funcA() { + return 43; +} + +//--- A.v1.cppm +export module A; + +export template +struct C { + T func() { + return T(43); + } +}; +export int funcA() { + return 43; +} + +//--- B.cppm +export module B; +import A; + +export int funcB() { + return funcA(); +} + +//--- C.cppm +export module C; +import A; +export void testD() { + C c; + c.func(); +} diff --git a/clang/test/Modules/pr61067.cppm b/clang/test/Modules/pr61067.cppm index b7f9d22e253854..f853491fe76bf7 100644 --- a/clang/test/Modules/pr61067.cppm +++ b/clang/test/Modules/pr61067.cppm @@ -9,22 +9,6 @@ // RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \ // RUN: -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ -// RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp - -// Test again with reduced BMI -// RUN: rm -rf %t -// RUN: mkdir -p %t -// RUN: split-file %s %t -// -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \ -// RUN: -emit-reduced-module-interface -o %t/a.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \ -// RUN: -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \ -// RUN: -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \ -// RUN: -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp //--- a.cppm export module a; @@ -43,12 +27,3 @@ void b() { } // CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_( - -//--- c.cpp -import a; - -int c() { - (void)(a() == a()); -} - -// CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_( diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp index 141da4c27f8d0b..1c929ae8cb8272 100644 --- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp +++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp @@ -16,49 +16,22 @@ using namespace llvm; using namespace clang; namespace { -using LocSeq = SourceLocationSequence; - // Convert a single source location into encoded form and back. // If ExpectedEncoded is provided, verify the encoded value too. // Loc is the raw (in-memory) form of SourceLocation. void roundTrip(SourceLocation::UIntTy Loc, std::optional ExpectedEncoded = std::nullopt) { - uint64_t ActualEncoded = - SourceLocationEncoding::encode(SourceLocation::getFromRawEncoding(Loc)); + uint64_t ActualEncoded = SourceLocationEncoding::encode( + SourceLocation::getFromRawEncoding(Loc), /*BaseOffset=*/0, + /*BaseModuleFileIndex=*/0); if (ExpectedEncoded) { ASSERT_EQ(ActualEncoded, *ExpectedEncoded) << "Encoding " << Loc; } SourceLocation::UIntTy DecodedEncoded = - SourceLocationEncoding::decode(ActualEncoded).getRawEncoding(); + SourceLocationEncoding::decode(ActualEncoded).first.getRawEncoding(); ASSERT_EQ(DecodedEncoded, Loc) << "Decoding " << ActualEncoded; } -// As above, but use sequence encoding for a series of locations. -void roundTrip(std::vector Locs, - std::vector ExpectedEncoded = {}) { - std::vector ActualEncoded; - { - LocSeq::State Seq; - for (auto L : Locs) - ActualEncoded.push_back(SourceLocationEncoding::encode( - SourceLocation::getFromRawEncoding(L), Seq)); - if (!ExpectedEncoded.empty()) { - ASSERT_EQ(ActualEncoded, ExpectedEncoded) - << "Encoding " << testing::PrintToString(Locs); - } - } - std::vector DecodedEncoded; - { - LocSeq::State Seq; - for (auto L : ActualEncoded) { - SourceLocation Loc = SourceLocationEncoding::decode(L, Seq); - DecodedEncoded.push_back(Loc.getRawEncoding()); - } - ASSERT_EQ(DecodedEncoded, Locs) - << "Decoding " << testing::PrintToString(ActualEncoded); - } -} - constexpr SourceLocation::UIntTy MacroBit = 1 << (sizeof(SourceLocation::UIntTy) * CHAR_BIT - 1); constexpr SourceLocation::UIntTy Big = MacroBit >> 1; @@ -75,33 +48,4 @@ TEST(SourceLocationEncoding, Individual) { roundTrip(MacroBit | (Big + 1)); } -TEST(SourceLocationEncoding, Sequence) { - roundTrip({1, 2, 3, 3, 2, 1}, - {2, // 1 - 5, // +2 (+1 of non-raw) - 5, // +2 - 1, // +0 - 4, // -2 - 4} // -2 - ); - roundTrip({100, 0, 100}, - {200, // 100 - 0, // 0 - 1} // +0 - ); - - roundTrip({1, Big}, {2, ((Big - 1) << 2) + 1}); - roundTrip({2, MacroBit | Big}, {4, ((Big - 1) << 2) - 1}); - - roundTrip({3, MacroBit | 5, MacroBit | 4, 3}, - {6, // 3 - 11, // +5 (+2 of non-raw + set macro bit) - 4, // -2 - 6} // -3 (-2 of non-raw, clear macro bit) - ); - - roundTrip( - {123 | MacroBit, 1, 9, Biggest, Big, Big + 1, 0, MacroBit | Big, 0}); -} - -} // namespace +} // namespace \ No newline at end of file