[flang][debug] Improve handling of cyclic derived types. (#122770)

When `RecordType` is converted to corresponding `DIType`, we cache the information to avoid doing the conversion again. Our conversion of `RecordType` looks like this: `ConvertRecordType(RecordType Ty)` 1. If type `Ty` is already in the cache, then return the corresponding item. 2. Create a place holder `DICompositeTypeAttr` (called `ty_self` below) for `Ty` 3. Put `Ty->ty_self` in the cache 4. Convert members of `Ty`. This may cause `ConvertRecordType` to be called again with other types. 5. Create final `DICompositeTypeAttr` 6. Replace the `ty_self` in the cache with one created in step 5 end The purpose of creating `ty_self` is to handle cases where a member may have reference to parent type. Now consider the code below: ``` type t1 type(t2), pointer :: p1 end type type t2 type(t1), pointer :: p2 end type ``` While processing t1, we could have a structure like below. `t1 -> t2 -> t1_self` The `t2` created during handling of `t1` cant be cached on its own as it contains a place holder reference. It will fail an assert in MLIR if it is processed standalone. To avoid this problem, we have a check in the step 6 above to not cache such types. But this check was not tight enough. It just checked if a type should not have a place holder reference to another type. It missed the following case where the place holder reference can be in a type further down the line. ``` type t1 type(t2), pointer :: p1 end type type t2 type(t3), pointer :: p2 end type type t3 type(t1), pointer :: p3 end type ``` So while processing `t1`, we have to stop caching of not only `t3` but also of `t2`. This PR improves the check and moves the logic inside `convertRecordType`. Please note that this limitation of why a type cant have a placeholder reference is because of how such references are resolved in the mlir. Please see the discussion at the end of this [PR](#106571). I have to change `getDerivedType` so that it will also get the derived type for things like `type(t2), pointer :: p1` which are wrapped in `BoxType`. Happy to move it to a new function or a local helper in case this change is problematic. Fixes #122024.
llvm · Jan 20, 2025 · af91372 · af91372
1 parent a79ae86
commit af91372
Show file tree

Hide file tree

Showing 3 changed files with 91 additions and 50 deletions.
diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp
@@ -210,6 +210,7 @@ mlir::Type getDerivedType(mlir::Type ty) {
           return seq.getEleTy();
         return p.getEleTy();
       })
+      .Case<fir::BoxType>([](auto p) { return getDerivedType(p.getEleTy()); })
       .Default([](mlir::Type t) { return t; });
 }
 

diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
@@ -273,55 +273,6 @@ static mlir::LLVM::DITypeAttr getUnderlyingType(mlir::LLVM::DITypeAttr Ty) {
   return Ty;
 }
 
-// Currently, the handling of recursive debug type in mlir has some limitations.
-// Those limitations were discussed at the end of the thread for following PR.
-// https://github.com/llvm/llvm-project/pull/106571
-//
-// Problem could be explained with the following example code:
-//  type t2
-//   type(t1), pointer :: p1
-// end type
-// type t1
-//   type(t2), pointer :: p2
-// end type
-// In the description below, type_self means a temporary type that is generated
-// as a place holder while the members of that type are being processed.
-//
-// If we process t1 first then we will have the following structure after it has
-// been processed.
-// t1 -> t2 -> t1_self
-// This is because when we started processing t2, we did not have the complete
-// t1 but its place holder t1_self.
-// Now if some entity requires t2, we will already have that in cache and will
-// return it. But this t2 refers to t1_self and not to t1. In mlir handling,
-// only those types are allowed to have _self reference which are wrapped by
-// entity whose reference it is. So t1 -> t2 -> t1_self is ok because the
-// t1_self reference can be resolved by the outer t1. But standalone t2 is not
-// because there will be no way to resolve it. Until this is fixed in mlir, we
-// avoid caching such types. Please see DebugTranslation::translateRecursive for
-// details on how mlir handles recursive types.
-static bool canCacheThisType(mlir::LLVM::DICompositeTypeAttr comTy) {
-  for (auto el : comTy.getElements()) {
-    if (auto mem =
-            mlir::dyn_cast_if_present<mlir::LLVM::DIDerivedTypeAttr>(el)) {
-      mlir::LLVM::DITypeAttr memTy = getUnderlyingType(mem.getBaseType());
-      if (auto baseTy =
-              mlir::dyn_cast_if_present<mlir::LLVM::DICompositeTypeAttr>(
-                  memTy)) {
-        // We will not cache a type if one of its member meets the following
-        // conditions:
-        // 1. It is a structure type
-        // 2. It is a place holder type (getIsRecSelf() is true)
-        // 3. It is not a self reference. It is ok to have t1_self in t1.
-        if (baseTy.getTag() == llvm::dwarf::DW_TAG_structure_type &&
-            baseTy.getIsRecSelf() && (comTy.getRecId() != baseTy.getRecId()))
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
 std::pair<std::uint64_t, unsigned short>
 DebugTypeGenerator::getFieldSizeAndAlign(mlir::Type fieldTy) {
   mlir::Type llvmTy;
@@ -343,6 +294,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType(
   if (iter != typeCache.end())
     return iter->second;
 
+  bool canCacheThisType = true;
   llvm::SmallVector<mlir::LLVM::DINodeAttr> elements;
   mlir::MLIRContext *context = module.getContext();
   auto recId = mlir::DistinctAttr::create(mlir::UnitAttr::get(context));
@@ -406,6 +358,62 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType(
         /*extra data=*/nullptr);
     elements.push_back(tyAttr);
     offset += llvm::alignTo(byteSize, byteAlign);
+
+    // Currently, the handling of recursive debug type in mlir has some
+    // limitations that were discussed at the end of the thread for following
+    // PR.
+    // https://github.com/llvm/llvm-project/pull/106571
+    //
+    // Problem could be explained with the following example code:
+    //  type t2
+    //   type(t1), pointer :: p1
+    // end type
+    // type t1
+    //   type(t2), pointer :: p2
+    // end type
+    // In the description below, type_self means a temporary type that is
+    // generated
+    // as a place holder while the members of that type are being processed.
+    //
+    // If we process t1 first then we will have the following structure after
+    // it has been processed.
+    // t1 -> t2 -> t1_self
+    // This is because when we started processing t2, we did not have the
+    // complete t1 but its place holder t1_self.
+    // Now if some entity requires t2, we will already have that in cache and
+    // will return it. But this t2 refers to t1_self and not to t1. In mlir
+    // handling, only those types are allowed to have _self reference which are
+    // wrapped by entity whose reference it is. So t1 -> t2 -> t1_self is ok
+    // because the t1_self reference can be resolved by the outer t1. But
+    // standalone t2 is not because there will be no way to resolve it. Until
+    // this is fixed in mlir, we avoid caching such types. Please see
+    // DebugTranslation::translateRecursive for details on how mlir handles
+    // recursive types.
+    // The code below checks for situation where it will be unsafe to cache
+    // a type to avoid this problem. We do that in 2 situations.
+    // 1. If a member is record type, then its type would have been processed
+    // before reaching here. If it is not in the cache, it means that it was
+    // found to be unsafe to cache. So any type containing it will also not
+    // be cached
+    // 2. The type of the member is found in the cache but it is a place holder.
+    // In this case, its recID should match the recID of the type we are
+    // processing. This helps us to cache the following type.
+    // type t
+    //  type(t), allocatable :: p
+    // end type
+    mlir::Type baseTy = getDerivedType(fieldTy);
+    if (auto recTy = mlir::dyn_cast<fir::RecordType>(baseTy)) {
+      auto iter = typeCache.find(recTy);
+      if (iter == typeCache.end())
+        canCacheThisType = false;
+      else {
+        if (auto tyAttr =
+                mlir::dyn_cast<mlir::LLVM::DICompositeTypeAttr>(iter->second)) {
+          if (tyAttr.getIsRecSelf() && tyAttr.getRecId() != recId)
+            canCacheThisType = false;
+        }
+      }
+    }
   }
 
   auto finalAttr = mlir::LLVM::DICompositeTypeAttr::get(
@@ -414,7 +422,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType(
       /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero, offset * 8,
       /*alignInBits=*/0, elements, /*dataLocation=*/nullptr, /*rank=*/nullptr,
       /*allocated=*/nullptr, /*associated=*/nullptr);
-  if (canCacheThisType(finalAttr)) {
+  if (canCacheThisType) {
     typeCache[Ty] = finalAttr;
   } else {
     auto iter = typeCache.find(Ty);

diff --git a/flang/test/Integration/debug-cyclic-derived-type-3.f90 b/flang/test/Integration/debug-cyclic-derived-type-3.f90
@@ -0,0 +1,32 @@
+! RUN: %flang_fc1 -emit-llvm -debug-info-kind=standalone %s -o -
+
+! mainly test that this program does not cause an assertion failure
+! testcase for issue 122024
+
+module m1
+  type t1
+    type(t2),pointer :: x1
+  end type
+  type t2
+    type(t3),pointer :: x2
+  end type
+  type t3
+    type(t1),pointer :: x3
+  end type
+end
+
+program test
+  use m1
+  type(t1),pointer :: foo
+  allocate(foo)
+  allocate(foo%x1)
+  allocate(foo%x1%x2)
+  allocate(foo%x1%x2%x3)
+  call sub1(foo%x1)
+  print *,'done'
+end program
+
+subroutine sub1(bar)
+  use m1
+  type(t2) :: bar
+end subroutine