From 9841b09e27f32c26729247b2dc3160cf145fd066 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Tue, 20 Feb 2024 10:48:55 +0800 Subject: [PATCH] [Serialization] Record whether the ODR is skipped Close https://github.com/llvm/llvm-project/issues/80570. In https://github.com/llvm/llvm-project/commit/a0b6747804e46665ecfd00295b60432bfe1775b6, we skipped ODR checks for decls in GMF. Then it should be natural to skip storing the ODR values in BMI. Generally it should be fine as long as the writer and the reader keep consistent. However, the use of preamble in clangd shows the tricky part. For, ``` // test.cpp module; // any one off these is enough to crash clangd // #include // #include // #include // #include // #include // #include // probably many more // only ok with libc++, not the system provided libstdc++ 13.2.1 // these are ok export module test; ``` clangd will store the headers as preamble to speedup the parsing and the preamble reuses the serialization techniques. (Generally we'd call the preamble as PCH. However it is not true strictly. I've tested the PCH wouldn't be problematic.) However, the tricky part is that the preamble is not modules. It literally serialiaze and deserialize things. So before clangd parsing the above test module, clangd will serialize the headers into the preamble. Note that there is no concept like GMF now. So the ODR bits are stored. However, when clangd parse the file actually, the decls from preamble are thought as in GMF literally, then hte ODR bits are skipped. Then mismatch happens. To solve the problem, this patch adds another bit for decls to record whether or not the ODR bits are skipped. --- clang/lib/Serialization/ASTReaderDecl.cpp | 10 +- clang/lib/Serialization/ASTWriter.cpp | 6 +- clang/lib/Serialization/ASTWriterDecl.cpp | 15 +- clang/unittests/Serialization/CMakeLists.txt | 1 + .../PreambleInNamedModulesTest.cpp | 132 ++++++++++++++++++ 5 files changed, 154 insertions(+), 10 deletions(-) create mode 100644 clang/unittests/Serialization/PreambleInNamedModulesTest.cpp diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index ffba04f28782ea..d5309e3fc31f70 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -800,11 +800,12 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { BitsUnpacker EnumDeclBits(Record.readInt()); ED->setNumPositiveBits(EnumDeclBits.getNextBits(/*Width=*/8)); ED->setNumNegativeBits(EnumDeclBits.getNextBits(/*Width=*/8)); + bool ShouldSkipCheckingODR = EnumDeclBits.getNextBit(); ED->setScoped(EnumDeclBits.getNextBit()); ED->setScopedUsingClassTag(EnumDeclBits.getNextBit()); ED->setFixed(EnumDeclBits.getNextBit()); - if (!shouldSkipCheckingODR(ED)) { + if (!ShouldSkipCheckingODR) { ED->setHasODRHash(true); ED->ODRHash = Record.readInt(); } @@ -1073,6 +1074,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { FD->setCachedLinkage((Linkage)FunctionDeclBits.getNextBits(/*Width=*/3)); FD->setStorageClass((StorageClass)FunctionDeclBits.getNextBits(/*Width=*/3)); + bool ShouldSkipCheckingODR = FunctionDeclBits.getNextBit(); FD->setInlineSpecified(FunctionDeclBits.getNextBit()); FD->setImplicitlyInline(FunctionDeclBits.getNextBit()); FD->setHasSkippedBody(FunctionDeclBits.getNextBit()); @@ -1102,7 +1104,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { if (FD->isExplicitlyDefaulted()) FD->setDefaultLoc(readSourceLocation()); - if (!shouldSkipCheckingODR(FD)) { + if (!ShouldSkipCheckingODR) { FD->ODRHash = Record.readInt(); FD->setHasODRHash(true); } @@ -1973,6 +1975,8 @@ void ASTDeclReader::ReadCXXDefinitionData( BitsUnpacker CXXRecordDeclBits = Record.readInt(); + bool ShouldSkipCheckingODR = CXXRecordDeclBits.getNextBit(); + #define FIELD(Name, Width, Merge) \ if (!CXXRecordDeclBits.canGetNextNBits(Width)) \ CXXRecordDeclBits.updateValue(Record.readInt()); \ @@ -1982,7 +1986,7 @@ void ASTDeclReader::ReadCXXDefinitionData( #undef FIELD // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) { // Note: the caller has deserialized the IsLambda bit already. Data.ODRHash = Record.readInt(); Data.HasODRHash = true; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 740bec586a5e33..a9edc7e68b53b3 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6056,6 +6056,9 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { BitsPacker DefinitionBits; + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + DefinitionBits.addBit(ShouldSkipCheckingODR); + #define FIELD(Name, Width, Merge) \ if (!DefinitionBits.canWriteNextNBits(Width)) { \ Record->push_back(DefinitionBits); \ @@ -6069,11 +6072,10 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(DefinitionBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) // getODRHash will compute the ODRHash if it has not been previously // computed. Record->push_back(D->getODRHash()); - } bool ModulesDebugInfo = Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType(); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index f224075643e998..e73800100e3ccf 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -488,13 +488,15 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { BitsPacker EnumDeclBits; EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8); EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + EnumDeclBits.addBit(ShouldSkipCheckingODR); EnumDeclBits.addBit(D->isScoped()); EnumDeclBits.addBit(D->isScopedUsingClassTag()); EnumDeclBits.addBit(D->isFixed()); Record.push_back(EnumDeclBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) { @@ -678,6 +680,8 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { // FIXME: stable encoding FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3); FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + FunctionDeclBits.addBit(ShouldSkipCheckingODR); FunctionDeclBits.addBit(D->isInlineSpecified()); FunctionDeclBits.addBit(D->isInlined()); FunctionDeclBits.addBit(D->hasSkippedBody()); @@ -704,7 +708,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { Record.AddSourceLocation(D->getDefaultLoc()); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (D->isDefaulted()) { @@ -2137,12 +2141,13 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, - 27)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, + 28)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, // VirtualAsWritten, Pure, HasInheritedProto, HasWrittenProto, // Deleted, Trivial, TrivialForCall, Defaulted, ExplicitlyDefaulted, // IsIneligibleOrNotSelected, ImplicitReturnZero, Constexpr, // UsesSEHTry, SkippedBody, MultiVersion, LateParsed, - // FriendConstraintRefersToEnclosingTemplate, Linkage + // FriendConstraintRefersToEnclosingTemplate, Linkage, + // ShouldSkipCheckingODR Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LocEnd Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // ODRHash // This Array slurps the rest of the record. Fortunately we want to encode @@ -2269,7 +2274,7 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddTypeRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IntegerType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getPromotionType - Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 19)); // Enum Decl Bits + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 20)); // Enum Decl Bits Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));// ODRHash Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedMembEnum // DC diff --git a/clang/unittests/Serialization/CMakeLists.txt b/clang/unittests/Serialization/CMakeLists.txt index 10d7de970c643d..e7eebd0cb98239 100644 --- a/clang/unittests/Serialization/CMakeLists.txt +++ b/clang/unittests/Serialization/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(SerializationTests InMemoryModuleCacheTest.cpp ModuleCacheTest.cpp NoCommentsTest.cpp + PreambleInNamedModulesTest.cpp SourceLocationEncodingTest.cpp VarDeclConstantInitTest.cpp ) diff --git a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp new file mode 100644 index 00000000000000..d26e1cb633654f --- /dev/null +++ b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp @@ -0,0 +1,132 @@ +//===- unittests/Serialization/PreambleInNamedModulesTest.cpp -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Frontend/PrecompiledPreamble.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { + +class PreambleInNamedModulesTest : public ::testing::Test { + void SetUp() override { + ASSERT_FALSE(sys::fs::createUniqueDirectory("modules-test", TestDir)); + } + + void TearDown() override { sys::fs::remove_directories(TestDir); } + +public: + using PathType = SmallString<256>; + + PathType TestDir; + + void addFile(StringRef Path, StringRef Contents, PathType &AbsPath) { + ASSERT_FALSE(sys::path::is_absolute(Path)); + + AbsPath = TestDir; + sys::path::append(AbsPath, Path); + + ASSERT_FALSE( + sys::fs::create_directories(llvm::sys::path::parent_path(AbsPath))); + + std::error_code EC; + llvm::raw_fd_ostream OS(AbsPath, EC); + ASSERT_FALSE(EC); + OS << Contents; + } + + void addFile(StringRef Path, StringRef Contents) { + PathType UnusedAbsPath; + addFile(Path, Contents, UnusedAbsPath); + } +}; + +// Testing that the use of Preamble in named modules can work basically. +// See https://github.com/llvm/llvm-project/issues/80570 +TEST_F(PreambleInNamedModulesTest, BasicTest) { + addFile("foo.h", R"cpp( +enum class E { + A, + B, + C, + D +}; + )cpp"); + + PathType MainFilePath; + addFile("A.cppm", R"cpp( +module; +#include "foo.h" +export module A; +export using ::E; + )cpp", + MainFilePath); + + IntrusiveRefCntPtr Diags = + CompilerInstance::createDiagnostics(new DiagnosticOptions()); + IntrusiveRefCntPtr VFS = + llvm::vfs::createPhysicalFileSystem(); + + CreateInvocationOptions CIOpts; + CIOpts.Diags = Diags; + CIOpts.VFS = VFS; + + const char *Args[] = {"clang++", "-std=c++20", "-working-directory", + TestDir.c_str(), MainFilePath.c_str()}; + std::shared_ptr Invocation = + createInvocation(Args, CIOpts); + ASSERT_TRUE(Invocation); + + llvm::ErrorOr> ContentsBuffer = + llvm::MemoryBuffer::getFile(MainFilePath, /*IsText=*/true); + EXPECT_TRUE(ContentsBuffer); + std::unique_ptr Buffer = std::move(*ContentsBuffer); + + PreambleBounds Bounds = + ComputePreambleBounds(Invocation->getLangOpts(), *Buffer, 0); + + PreambleCallbacks Callbacks; + llvm::ErrorOr BuiltPreamble = PrecompiledPreamble::Build( + *Invocation, Buffer.get(), Bounds, *Diags, VFS, + std::make_shared(), + /*StoreInMemory=*/false, /*StoragePath=*/TestDir, Callbacks); + + ASSERT_FALSE(Diags->hasErrorOccurred()); + + EXPECT_TRUE(BuiltPreamble); + EXPECT_TRUE(BuiltPreamble->CanReuse(*Invocation, *Buffer, Bounds, *VFS)); + BuiltPreamble->OverridePreamble(*Invocation, VFS, Buffer.get()); + + auto Clang = std::make_unique( + std::make_shared()); + Clang->setInvocation(std::move(Invocation)); + Clang->setDiagnostics(Diags.get()); + + if (auto VFSWithRemapping = createVFSFromCompilerInvocation( + Clang->getInvocation(), Clang->getDiagnostics(), VFS)) + VFS = VFSWithRemapping; + + Clang->createFileManager(VFS); + EXPECT_TRUE(Clang->createTarget()); + + Buffer.release(); + + SyntaxOnlyAction Action; + EXPECT_TRUE(Clang->ExecuteAction(Action)); + EXPECT_FALSE(Clang->getDiagnosticsPtr()->hasErrorOccurred()); +} + +} // namespace