flang: Operations with complex floating point numbers give unexpected results on MinGW #61976

mmuetzel · 2023-04-06T09:22:52Z

The following code compiled with Flang 16.0.0 installed from MSYS2 gives unexpected results:

program test
  implicit none
  complex :: z = cmplx(0., 1.)
  print*, z**2
end program

Result:

$ flang test-pow.f90 -o test-pow

$ ./test-pow
 (1.5440984E+23,0.)

$ ./test-pow
 (1.3455499E-28,0.)

$ ./test-pow
 (5.51239E-25,0.)

$ ./test-pow
 (Inf,0.)

It almost looks like random memory is dereferenced for the real part, and the imaginary part is always 0.

It is working correctly with GFortran:

$ gfortran test-pow.f90 -o test-pow

$ ./test-pow
            (-1.00000000,0.00000000)

If there is anything I can do to help debugging, please let me know.

mmuetzel · 2023-04-06T09:30:11Z

Assembly from flang -S test-pow.f90 -o test-pow.s:

	.text
	.def	@feat.00;
	.scl	3;
	.type	0;
	.endef
	.globl	@feat.00
.set @feat.00, 0
	.file	"FIRModule"
	.def	_QQmain;
	.scl	2;
	.type	32;
	.endef
	.globl	_QQmain
	.p2align	4, 0x90
_QQmain:
.seh_proc _QQmain
	subq	$104, %rsp
	.seh_stackalloc 104
	.seh_endprologue
	movl	$1065353216, 92(%rsp)
	movl	$0, 88(%rsp)
	movss	88(%rsp), %xmm0
	movss	92(%rsp), %xmm1
	movss	%xmm1, 84(%rsp)
	movss	%xmm0, 80(%rsp)
	movsd	80(%rsp), %xmm0
	movapd	%xmm0, 48(%rsp)
	leaq	48(%rsp), %rcx
	movl	$2, %edx
	callq	_FortranAcpowi
	movlpd	%xmm0, 72(%rsp)
	movss	72(%rsp), %xmm0
	movss	76(%rsp), %xmm1
	movss	%xmm1, 100(%rsp)
	movss	%xmm0, 96(%rsp)
	leaq	_QQcl.2E2F746573742D706F772E66393000(%rip), %rdx
	movl	$-1, %ecx
	movl	$6, %r8d
	callq	_FortranAioBeginExternalListOutput
	movq	%rax, %rcx
	movq	%rcx, 40(%rsp)
	movss	96(%rsp), %xmm1
	movss	100(%rsp), %xmm2
	callq	_FortranAioOutputComplex32
	movq	40(%rsp), %rcx
	callq	_FortranAioEndIoStatement
	nop
	addq	$104, %rsp
	retq
	.seh_endproc

	.section	.rdata,"dr"
	.weak	_QQcl.2E2F746573742D706F772E66393000
_QQcl.2E2F746573742D706F772E66393000:
	.asciz	"./test-pow.f90"

	.globl	_QQEnvironmentDefaults
	.p2align	3, 0x0
_QQEnvironmentDefaults:
	.quad	0

And LLVM IR with -emit-llvm:

; ModuleID = 'FIRModule'
source_filename = "FIRModule"
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-w64-windows-gnu"

@_QQcl.2E2F746573742D706F772E66393000 = linkonce constant [15 x i8] c"./test-pow.f90\00"
@_QQEnvironmentDefaults = constant ptr null

declare ptr @malloc(i64)

declare void @free(ptr)

define void @_QQmain() {
  %1 = alloca { float, float }, i64 1, align 8
  %2 = alloca { float, float }, i64 1, align 8
  store { float, float } { float 0.000000e+00, float 1.000000e+00 }, ptr %2, align 4
  %3 = load { float, float }, ptr %2, align 4
  %4 = alloca <2 x float>, i64 1, align 8
  store { float, float } %3, ptr %4, align 4
  %5 = load <2 x float>, ptr %4, align 8
  %6 = call <2 x float> @_FortranAcpowi(<2 x float> %5, i32 2)
  %7 = alloca <2 x float>, i64 1, align 8
  store <2 x float> %6, ptr %7, align 8
  %8 = load { float, float }, ptr %7, align 4
  store { float, float } %8, ptr %1, align 4
  %9 = call ptr @_FortranAioBeginExternalListOutput(i32 -1, ptr @_QQcl.2E2F746573742D706F772E66393000, i32 6)
  %10 = load { float, float }, ptr %1, align 4
  %11 = extractvalue { float, float } %10, 0
  %12 = extractvalue { float, float } %10, 1
  %13 = call i1 @_FortranAioOutputComplex32(ptr %9, float %11, float %12)
  %14 = call i32 @_FortranAioEndIoStatement(ptr %9)
  ret void
}

declare <2 x float> @_FortranAcpowi(<2 x float>, i32)

declare ptr @_FortranAioBeginExternalListOutput(i32, ptr, i32)

declare i1 @_FortranAioOutputComplex32(ptr, float, float)

declare i32 @_FortranAioEndIoStatement(ptr)

!llvm.module.flags = !{!0, !1}

!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 8, !"PIC Level", i32 2}

mmuetzel · 2023-04-06T10:16:45Z

Not sure if the following helps for comparison. (I hope I understood @mstorsjo's hints on IRC correctly):

struct cpx { float r, i; };

struct cpx _FortranAcpowi(struct cpx base, int exp);

int main(void)
{
  struct cpx c;
  c.r = 0;
  c.i = 1;
  struct cpx z = _FortranAcpowi(c, 2);
  return 0;
}

The assembly for clang -S -O2 test-pow.c -o test-pow-c.s:

	.text
	.def	@feat.00;
	.scl	3;
	.type	0;
	.endef
	.globl	@feat.00
.set @feat.00, 0
	.file	"test-pow.c"
	.def	main;
	.scl	2;
	.type	32;
	.endef
	.globl	main                            # -- Begin function main
	.p2align	4, 0x90
main:                                   # @main
.seh_proc main
# %bb.0:
	pushq	%rbp
	.seh_pushreg %rbp
	subq	$32, %rsp
	.seh_stackalloc 32
	leaq	32(%rsp), %rbp
	.seh_setframe %rbp, 32
	.seh_endprologue
	callq	__main
	movabsq	$4575657221408423936, %rcx      # imm = 0x3F80000000000000
	movl	$2, %edx
	callq	_FortranAcpowi
	xorl	%eax, %eax
	addq	$32, %rsp
	popq	%rbp
	retq
	.seh_endproc
                                        # -- End function
	.addrsig

mstorsjo · 2023-04-06T10:34:33Z

	movabsq	$4575657221408423936, %rcx      # imm = 0x3F80000000000000
	movl	$2, %edx
	callq	_FortranAcpowi

Ok, so in the case of C, at least with this definition of the struct, Clang expects to pass the struct of two floats packed into one GPR as the first argument. While in the case of the Fortran generated call above:

  movapd	%xmm0, 48(%rsp)
  leaq	48(%rsp), %rcx
  movl	$2, %edx
  callq	_FortranAcpowi

Here the complex struct is packed, stored on the stack and the address of it is passed as the first argument. I think this might be the mismatch at play here.

From the LLVM IR generated, in the Flang case, declare <2 x float> @_FortranAcpowi(<2 x float>, i32) is seems to represent the complex number as <2 x float> which I believe is a vector of 2 floats, while the C case generates declare dso_local i64 @_FortranAcpowi(i64, i32 noundef) where it's flattened to an i64.

So I believe this boils down to Flang not knowing what the C level ABI is for complex numbers on this particular platform and how it's represented.

mstorsjo · 2023-04-06T10:38:51Z

It looks like https://github.com/llvm/llvm-project/blob/llvmorg-16.0.1/flang/lib/Optimizer/CodeGen/Target.cpp#L134-L136 is the code that might be responsible for picking the right type here - it expects the linux type of ABI for all x86_64 targets here. https://github.com/llvm/llvm-project/blob/llvmorg-16.0.1/flang/lib/Optimizer/CodeGen/Target.cpp#L102-L104 looks like what we'd want it to be for x86_64 on Windows.

mmuetzel · 2023-04-06T11:23:13Z

Thank you very much for tracking that down.

I hope it is ok to extent that to the double precision case:

program test
  implicit none
  double complex :: x, z
  z = cmplx(0, 1.)
  x = z**2
  print*, x
  ! print*, pow(z,2)
end program

Result:

$ flang -O3 test-pow.f90 -o test-pow

$ ./test-pow
Segmentation fault

Generated assembly for flang -O2 -S test-pow-double.f90 -o test-pow-double.s:

	.text
	.def	@feat.00;
	.scl	3;
	.type	0;
	.endef
	.globl	@feat.00
.set @feat.00, 0
	.file	"FIRModule"
	.def	_QQmain;
	.scl	2;
	.type	32;
	.endef
	.section	.rdata,"dr"
	.p2align	3, 0x0
.LCPI0_0:
	.quad	0x3ff0000000000000
	.text
	.globl	_QQmain
	.p2align	4, 0x90
_QQmain:
.seh_proc _QQmain
	pushq	%rsi
	.seh_pushreg %rsi
	subq	$64, %rsp
	.seh_stackalloc 64
	movaps	%xmm7, 48(%rsp)
	.seh_savexmm %xmm7, 48
	movaps	%xmm6, 32(%rsp)
	.seh_savexmm %xmm6, 32
	.seh_endprologue
	movsd	.LCPI0_0(%rip), %xmm1
	xorps	%xmm0, %xmm0
	movl	$2, %r8d
	callq	_FortranAzpowi
	movaps	%xmm0, %xmm6
	movaps	%xmm1, %xmm7
	leaq	_QQcl.2E2F746573742D706F772E66393000(%rip), %rdx
	movl	$-1, %ecx
	movl	$6, %r8d
	callq	_FortranAioBeginExternalListOutput
	movq	%rax, %rsi
	movq	%rax, %rcx
	movaps	%xmm6, %xmm1
	movaps	%xmm7, %xmm2
	callq	_FortranAioOutputComplex64
	movq	%rsi, %rcx
	movaps	32(%rsp), %xmm6
	movaps	48(%rsp), %xmm7
	addq	$64, %rsp
	popq	%rsi
	jmp	_FortranAioEndIoStatement
	.seh_endproc

	.section	.rdata,"dr"
	.weak	_QQcl.2E2F746573742D706F772E66393000
_QQcl.2E2F746573742D706F772E66393000:
	.asciz	"./test-pow.f90"

	.globl	_QQEnvironmentDefaults
	.p2align	3, 0x0
_QQEnvironmentDefaults:
	.quad	0

Mock C example for comparison:

#include <stdio.h>

struct cpx { double r, i; };

struct cpx _FortranAzpowi(struct cpx base, int exp);

int main(void)
{
  struct cpx c;
  c.r = 0.;
  c.i = 1.;
  struct cpx z = _FortranAzpowi(c, 2);

  printf("(%f, %f)\n", z.r, z.i);

  return 0;
}

Result:

$ clang test-pow.c -o test-pow-c -lFortranRuntime

$ ./test-pow-c
(-1.000000, 0.000000)

Assembly for clang -O2 -S test-pow.c -o test-pow-c.s:

	.text
	.def	@feat.00;
	.scl	3;
	.type	0;
	.endef
	.globl	@feat.00
.set @feat.00, 0
	.file	"test-pow.c"
	.def	main;
	.scl	2;
	.type	32;
	.endef
	.section	.rdata,"dr"
	.p2align	3, 0x0                          # -- Begin function main
.LCPI0_0:
	.quad	0x3ff0000000000000              # double 1
	.text
	.globl	main
	.p2align	4, 0x90
main:                                   # @main
.seh_proc main
# %bb.0:
	pushq	%rbp
	.seh_pushreg %rbp
	subq	$64, %rsp
	.seh_stackalloc 64
	leaq	64(%rsp), %rbp
	.seh_setframe %rbp, 64
	.seh_endprologue
	callq	__main
	xorps	%xmm0, %xmm0
	movhps	.LCPI0_0(%rip), %xmm0           # xmm0 = xmm0[0,1],mem[0,1]
	movaps	%xmm0, -32(%rbp)
	leaq	-16(%rbp), %rcx
	leaq	-32(%rbp), %rdx
	movl	$2, %r8d
	callq	_FortranAzpowi
	movq	-16(%rbp), %xmm1                # xmm1 = mem[0],zero
	movq	-8(%rbp), %xmm2                 # xmm2 = mem[0],zero
	leaq	.L.str(%rip), %rcx
	movq	%xmm1, %rdx
	movq	%xmm2, %r8
	callq	printf
	xorl	%eax, %eax
	addq	$64, %rsp
	popq	%rbp
	retq
	.seh_endproc
                                        # -- End function
	.section	.rdata,"dr"
.L.str:                                 # @.str
	.asciz	"(%f, %f)\n"

	.addrsig

llvmbot · 2023-04-06T13:58:29Z

@llvm/issue-subscribers-flang-ir

mmuetzel · 2023-04-06T15:42:07Z

Thank you @mstorsjo for pointing out the fix.

The following change makes the (single precision) complex example work for me:

diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index 50d144909a1c..83b6261c1d34 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -232,6 +232,71 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
 };
 } // namespace
 
+//===----------------------------------------------------------------------===//
+// x86_64 (x86 64 bit) Windows target specifics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct TargetX86_64Win : public GenericTarget<TargetX86_64Win> {
+  using GenericTarget::GenericTarget;
+
+  static constexpr int defaultWidth = 64;
+
+  CodeGenSpecifics::Marshalling
+  complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
+    CodeGenSpecifics::Marshalling marshal;
+    const auto *sem = &floatToSemantics(kindMap, eleTy);
+    if (sem == &llvm::APFloat::IEEEsingle()) {
+      // i64   pack both floats in a 64-bit GPR
+      marshal.emplace_back(mlir::IntegerType::get(eleTy.getContext(), 64),
+                           AT{});
+    } else if (sem == &llvm::APFloat::IEEEdouble()) {
+      // Use a type that will be translated into LLVM as:
+      // { double, double }   struct of 2 double
+      marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
+                                                mlir::TypeRange{eleTy, eleTy}),
+                           AT{});
+    } else if (sem == &llvm::APFloat::IEEEquad()) {
+      // Use a type that will be translated into LLVM as:
+      // { fp128, fp128 }   struct of 2 fp128, byval, align 16
+      marshal.emplace_back(
+          fir::ReferenceType::get(mlir::TupleType::get(
+              eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
+          AT{/*align=*/16, /*byval=*/true});
+    } else {
+      TODO(loc, "complex for this precision");
+    }
+    return marshal;
+  }
+
+  CodeGenSpecifics::Marshalling
+  complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
+    CodeGenSpecifics::Marshalling marshal;
+    const auto *sem = &floatToSemantics(kindMap, eleTy);
+    if (sem == &llvm::APFloat::IEEEsingle()) {
+      // <2 x t>   vector of 2 eleTy
+      marshal.emplace_back(fir::VectorType::get(2, eleTy), AT{});
+    } else if (sem == &llvm::APFloat::IEEEdouble()) {
+      // Use a type that will be translated into LLVM as:
+      // { double, double }   struct of 2 double
+      marshal.emplace_back(mlir::TupleType::get(eleTy.getContext(),
+                                                mlir::TypeRange{eleTy, eleTy}),
+                           AT{});
+    } else if (sem == &llvm::APFloat::IEEEquad()) {
+      // Use a type that will be translated into LLVM as:
+      // { fp128, fp128 }   struct of 2 fp128, sret, align 16
+      marshal.emplace_back(
+          fir::ReferenceType::get(mlir::TupleType::get(
+              eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
+          AT{/*align=*/16, /*byval=*/false, /*sret=*/true});
+    } else {
+      TODO(loc, "complex for this precision");
+    }
+    return marshal;
+  }
+};
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // AArch64 linux target specifics.
 //===----------------------------------------------------------------------===//
@@ -548,8 +613,12 @@ fir::CodeGenSpecifics::get(mlir::MLIRContext *ctx, llvm::Triple &&trp,
     return std::make_unique<TargetI386>(ctx, std::move(trp),
                                         std::move(kindMap));
   case llvm::Triple::ArchType::x86_64:
-    return std::make_unique<TargetX86_64>(ctx, std::move(trp),
-                                          std::move(kindMap));
+    if (trp.isOSWindows())
+      return std::make_unique<TargetX86_64Win>(ctx, std::move(trp),
+                                               std::move(kindMap));
+    else
+      return std::make_unique<TargetX86_64>(ctx, std::move(trp),
+                                            std::move(kindMap));
   case llvm::Triple::ArchType::aarch64:
     return std::make_unique<TargetAArch64>(ctx, std::move(trp),
                                            std::move(kindMap));

It still crashes for double complex though.
I don't know what the correct conversion for that is.

mmuetzel · 2023-04-06T16:39:12Z

Got it to work for both float and double complex. The LLVM IR for both function arguments and return values looks equivalent to the C mock examples:

From dc8830b6d616c03b41b287b3c34065a0592a566e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Markus=20M=C3=BCtzel?= <markus.muetzel@gmx.de>
Date: Thu, 6 Apr 2023 18:36:50 +0200
Subject: [PATCH] [flang][codegen]: Complex numbers in function arguments on
 Windows.

Function arguments or return values that are complex floating point values
aren't correctly lowered for Windows 64-bit targets.
Add a target that is specific for that platform.
---
 flang/lib/Optimizer/CodeGen/Target.cpp | 76 +++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index 50d144909a1c..e3f6757ac573 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -232,6 +232,74 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
 };
 } // namespace
 
+//===----------------------------------------------------------------------===//
+// x86_64 (x86 64 bit) Windows target specifics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct TargetX86_64Win : public GenericTarget<TargetX86_64Win> {
+  using GenericTarget::GenericTarget;
+
+  static constexpr int defaultWidth = 64;
+
+  CodeGenSpecifics::Marshalling
+  complexArgumentType(mlir::Location loc, mlir::Type eleTy) const override {
+    CodeGenSpecifics::Marshalling marshal;
+    const auto *sem = &floatToSemantics(kindMap, eleTy);
+    if (sem == &llvm::APFloat::IEEEsingle()) {
+      // i64   pack both floats in a 64-bit GPR
+      marshal.emplace_back(mlir::IntegerType::get(eleTy.getContext(), 64),
+                           AT{});
+    } else if (sem == &llvm::APFloat::IEEEdouble()) {
+      // Use a type that will be translated into LLVM as:
+      // { double, double }   struct of 2 double, byval, align 8
+      marshal.emplace_back(
+          fir::ReferenceType::get(mlir::TupleType::get(
+              eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
+          AT{/*align=*/8, /*byval=*/true});
+    } else if (sem == &llvm::APFloat::IEEEquad()) {
+      // Use a type that will be translated into LLVM as:
+      // { fp128, fp128 }   struct of 2 fp128, byval, align 16
+      marshal.emplace_back(
+          fir::ReferenceType::get(mlir::TupleType::get(
+              eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
+          AT{/*align=*/16, /*byval=*/true});
+    } else {
+      TODO(loc, "complex for this precision");
+    }
+    return marshal;
+  }
+
+  CodeGenSpecifics::Marshalling
+  complexReturnType(mlir::Location loc, mlir::Type eleTy) const override {
+    CodeGenSpecifics::Marshalling marshal;
+    const auto *sem = &floatToSemantics(kindMap, eleTy);
+    if (sem == &llvm::APFloat::IEEEsingle()) {
+      // i64   pack both floats in a 64-bit GPR
+      marshal.emplace_back(mlir::IntegerType::get(eleTy.getContext(), 64),
+                           AT{});
+    } else if (sem == &llvm::APFloat::IEEEdouble()) {
+      // Use a type that will be translated into LLVM as:
+      // { double, double }   struct of 2 double, sret, align 8
+      marshal.emplace_back(
+          fir::ReferenceType::get(mlir::TupleType::get(
+              eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
+          AT{/*align=*/8, /*byval=*/false, /*sret=*/true});
+    } else if (sem == &llvm::APFloat::IEEEquad()) {
+      // Use a type that will be translated into LLVM as:
+      // { fp128, fp128 }   struct of 2 fp128, sret, align 16
+      marshal.emplace_back(
+          fir::ReferenceType::get(mlir::TupleType::get(
+              eleTy.getContext(), mlir::TypeRange{eleTy, eleTy})),
+          AT{/*align=*/16, /*byval=*/false, /*sret=*/true});
+    } else {
+      TODO(loc, "complex for this precision");
+    }
+    return marshal;
+  }
+};
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // AArch64 linux target specifics.
 //===----------------------------------------------------------------------===//
@@ -548,8 +616,12 @@ fir::CodeGenSpecifics::get(mlir::MLIRContext *ctx, llvm::Triple &&trp,
     return std::make_unique<TargetI386>(ctx, std::move(trp),
                                         std::move(kindMap));
   case llvm::Triple::ArchType::x86_64:
-    return std::make_unique<TargetX86_64>(ctx, std::move(trp),
-                                          std::move(kindMap));
+    if (trp.isOSWindows())
+      return std::make_unique<TargetX86_64Win>(ctx, std::move(trp),
+                                               std::move(kindMap));
+    else
+      return std::make_unique<TargetX86_64>(ctx, std::move(trp),
+                                            std::move(kindMap));
   case llvm::Triple::ArchType::aarch64:
     return std::make_unique<TargetAArch64>(ctx, std::move(trp),
                                            std::move(kindMap));
-- 
2.38.0.windows.1

Can I do something more to help?

kiranchandramohan · 2023-04-06T19:02:06Z

Can I do something more to help?

Can you create a patch? https://llvm.org/docs/Phabricator.html

MehdiChinoune · 2023-04-06T19:06:13Z

One question, When Flang developers requested to join LLVM project years ago, they promised to add support for Windows.
We are not seeing anything like that (Windows support was added by volunteers)

kiranchandramohan · 2023-04-06T19:35:52Z

One question, When Flang developers requested to join LLVM project years ago, they promised to add support for Windows. We are not seeing anything like that (Windows support was added by volunteers)

All Flang developers are volunteers. Flang is an open-source project and everyone is welcome to contribute. If Windows enablement is the most important feature for you then you should join the development effort.

Now, since you have asked, we have been supporting Windows as much as possible. What has been done so far:
-> Every patch that goes for review goes through a Windows pre-commit CI. Many Windows fixes were made as part of this effort to switch on the CI. For eg: https://buildkite.com/llvm-project/premerge-checks/builds/145158#0187521f-04a2-4d89-9c2b-f4a6767848ba
-> Post commit windows CI https://lab.llvm.org/buildbot/#/builders/172, https://lab.llvm.org/buildbot/#/builders/65 (named clang but builds and checks flang as well). Enablement of these CIs required some fixes.
-> All tests were moved from using shell scripts to python. During the course of porting the tests, some fixes were made.
-> Switching to using LLVM Filesystem API helped windows.
-> Driver supports windows (issues are there) since we are using the Clang Driver.

The existence of Windows CI ensures that whatever changes are being made are tested on the Windows platform. I think all these (and more that I have missed) more than satisfies any promise that was made.

MehdiChinoune · 2023-04-06T21:04:06Z

All Flang developers are volunteers.

I mean by volunteers everyone who contribute for free, Most of Flang contributors are paid developers (including you from AMD, if I am not mistaking)

Windows CI was added after Windows support was added by volunteers, and the developers who added that CI (I think from Linaro) are not the one who promised Windows support.
I was the one who asked to enable Flang on Windows CI google/llvm-premerge-checks#287 after I have fixed the remaining issue at the time https://reviews.llvm.org/D96069

kiranchandramohan · 2023-04-06T23:13:18Z

All Flang developers are volunteers.

I mean by volunteers everyone who contribute for free, Most of Flang contributors are paid developers (including you from AMD, if I am not mistaking)

I work for Arm. If people are paid, they are also given priorities. For some it might be fixing all issues seen in tests, for some it might be enabling GPU offloading, for some it might be performance.

Windows CI was added after Windows support was added by volunteers, and the developers who added that CI (I think from Linaro) are not the one who promised Windows support.

I don't know who made the promise. Having Windows support is a good thing. But there are a lot of people who have helped in adding support for Windows including developers at companies. Also writing the code in a reasonably portable way in the first place also helped to port. We should not forget all that effort.

I was the one who asked to enable Flang on Windows CI google/llvm-premerge-checks#287 after I have fixed the remaining issue at the time https://reviews.llvm.org/D96069

Thanks for that. At the same time, we should not forget the effort of setting up the post-commit CI, enabling tests for Windows, and fixing any Windows issues seen while submitting patches. Sometimes this has not been easy without access to a local Windows machine.

mmuetzel · 2023-04-07T07:30:47Z

I uploaded a patch to Phabricator here: https://reviews.llvm.org/D147768

hmartinez82 · 2023-04-15T11:15:11Z

What about aarch64 on Windows?

mmuetzel · 2023-04-15T11:21:24Z

What about aarch64 on Windows?

Apparently, that is working for you, isn't it?

hmartinez82 · 2023-04-15T12:18:30Z

It is for Octave. Just hoping that fixing for x86_64 won't break aarch64 :)

MehdiChinoune · 2023-04-15T12:24:53Z

It is for Octave. Just hoping that fixing for x86_64 won't break aarch64 :)

mingw-w64-flang is already patched.

mmuetzel · 2023-04-15T12:32:46Z

It is for Octave. Just hoping that fixing for x86_64 won't break aarch64 :)

Those are different targets. Making changes for one doesn't affect the other.

@mstorsjo

Function arguments or return values that are complex floating point values aren't correctly lowered for Windows x86 32-bit and 64-bit targets. See: #61976 Add targets that are specific for these platforms and OS. With thanks to @mstorsjo for pointing out the fix. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D147768

mmuetzel · 2023-04-18T16:21:22Z

Should be fixed with this change: 774703e

@mstorsjo

Function arguments or return values that are complex floating point values aren't correctly lowered for Windows x86 32-bit and 64-bit targets. See: llvm/llvm-project#61976 Add targets that are specific for these platforms and OS. With thanks to @mstorsjo for pointing out the fix. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D147768 (cherry picked from commit 774703e)

@mstorsjo

Function arguments or return values that are complex floating point values aren't correctly lowered for Windows x86 32-bit and 64-bit targets. See: llvm/llvm-project#61976 Add targets that are specific for these platforms and OS. With thanks to @mstorsjo for pointing out the fix. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D147768 (cherry picked from commit 774703e)

@mstorsjo

Function arguments or return values that are complex floating point values aren't correctly lowered for Windows x86 32-bit and 64-bit targets. See: llvm#61976 Add targets that are specific for these platforms and OS. With thanks to @mstorsjo for pointing out the fix. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D147768

@mstorsjo

Function arguments or return values that are complex floating point values aren't correctly lowered for Windows x86 32-bit and 64-bit targets. See: llvm#61976 Add targets that are specific for these platforms and OS. With thanks to @mstorsjo for pointing out the fix. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D147768 (cherry picked from commit 774703e)

badger200 · 2023-08-13T04:43:24Z

@mmuetzel I am having this same problem on iOS AArch64 Darwin (I’m jailbroken but this would also apply to M1 Silicon Macs).

I compiled your Fortran and C examples with flang 16.0.2 (which I compiled myself):

23:32:30 iPad-Pro:/src/OpenBLAS-0.3.23 root$ ./testc.c-clang-16-libFortranRuntime-16-executable 
(-1.000000, 0.000000)
23:37:05 iPad-Pro:/src/OpenBLAS-0.3.23 root$ ./test.o-flang-16-missing-FortranAcpowi-executable
 (-1.,0.)
23:37:26 iPad-Pro:/src/OpenBLAS-0.3.23 root$ ./test-lfortran 
(-1.000000,-0.000000)

The middle result is the problem. As you can see, the C version compiled with clang-16.0.2 and the LFortran 0.19.0 version both give the correct result.

Will the solution be to make a Flang Target patch for AArch64 like you did for Windows x86-64? Or does this indicate a larger issue?

mmuetzel · 2023-08-13T05:00:17Z

@badger200: I don't see how this is the same problem. Afaict, the three results you showed seem to be equivalent. The only difference seems to be how many significant digits are displayed.
If you think there is a problem in the text output of complex numbers with either Clang or Flang, it might make sense to open a specific issue if there is none yet.

@mstorsjo

Function arguments or return values that are complex floating point values aren't correctly lowered for Windows x86 32-bit and 64-bit targets. See: llvm#61976 Add targets that are specific for these platforms and OS. With thanks to @mstorsjo for pointing out the fix. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D147768 (cherry picked from commit 774703e)

@mstorsjo

Function arguments or return values that are complex floating point values aren't correctly lowered for Windows x86 32-bit and 64-bit targets. See: llvm/llvm-project#61976 Add targets that are specific for these platforms and OS. With thanks to @mstorsjo for pointing out the fix. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D147768

github-actions bot added the new issue label Apr 6, 2023

EugeneZelenko added flang:ir and removed new issue labels Apr 6, 2023

MehdiChinoune mentioned this issue Apr 6, 2023

flang: Fix complex power. msys2/MINGW-packages#16700

Closed

mmuetzel mentioned this issue Apr 15, 2023

octave: build with LLVM compilers and update to 8.2.0 msys2/MINGW-packages#16156

Merged

mmuetzel closed this as completed Apr 18, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

flang: Operations with complex floating point numbers give unexpected results on MinGW #61976

flang: Operations with complex floating point numbers give unexpected results on MinGW #61976

mmuetzel commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mmuetzel commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mmuetzel commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mstorsjo commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mstorsjo commented Apr 6, 2023

mmuetzel commented Apr 6, 2023

llvmbot commented Apr 6, 2023

mmuetzel commented Apr 6, 2023 •

edited

Loading

mmuetzel commented Apr 6, 2023

kiranchandramohan commented Apr 6, 2023

MehdiChinoune commented Apr 6, 2023

kiranchandramohan commented Apr 6, 2023

MehdiChinoune commented Apr 6, 2023 •

edited

Loading

kiranchandramohan commented Apr 6, 2023 •

edited

Loading

mmuetzel commented Apr 7, 2023 •

edited

Loading

hmartinez82 commented Apr 15, 2023

mmuetzel commented Apr 15, 2023

hmartinez82 commented Apr 15, 2023

MehdiChinoune commented Apr 15, 2023

mmuetzel commented Apr 15, 2023

mmuetzel commented Apr 18, 2023

badger200 commented Aug 13, 2023 •

edited

Loading

mmuetzel commented Aug 13, 2023

flang: Operations with complex floating point numbers give unexpected results on MinGW #61976

flang: Operations with complex floating point numbers give unexpected results on MinGW #61976

Comments

mmuetzel commented Apr 6, 2023 • edited by VoltrexKeyva Loading

mmuetzel commented Apr 6, 2023 • edited by VoltrexKeyva Loading

mmuetzel commented Apr 6, 2023 • edited by VoltrexKeyva Loading

mstorsjo commented Apr 6, 2023 • edited by VoltrexKeyva Loading

mstorsjo commented Apr 6, 2023

mmuetzel commented Apr 6, 2023

llvmbot commented Apr 6, 2023

mmuetzel commented Apr 6, 2023 • edited Loading

mmuetzel commented Apr 6, 2023

kiranchandramohan commented Apr 6, 2023

MehdiChinoune commented Apr 6, 2023

kiranchandramohan commented Apr 6, 2023

MehdiChinoune commented Apr 6, 2023 • edited Loading

kiranchandramohan commented Apr 6, 2023 • edited Loading

mmuetzel commented Apr 7, 2023 • edited Loading

hmartinez82 commented Apr 15, 2023

mmuetzel commented Apr 15, 2023

hmartinez82 commented Apr 15, 2023

MehdiChinoune commented Apr 15, 2023

mmuetzel commented Apr 15, 2023

mmuetzel commented Apr 18, 2023

badger200 commented Aug 13, 2023 • edited Loading

mmuetzel commented Aug 13, 2023

mmuetzel commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mmuetzel commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mmuetzel commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mstorsjo commented Apr 6, 2023 •

edited by VoltrexKeyva

Loading

mmuetzel commented Apr 6, 2023 •

edited

Loading

MehdiChinoune commented Apr 6, 2023 •

edited

Loading

kiranchandramohan commented Apr 6, 2023 •

edited

Loading

mmuetzel commented Apr 7, 2023 •

edited

Loading

badger200 commented Aug 13, 2023 •

edited

Loading