Merged master:4ad459997e65 into amd-gfx:5efa00aba612

Local branch amd-gfx 5efa00a Merged master:552c6c232872 into amd-gfx:28482ff9f121 Remote branch master 4ad4599 [AArch64][GlobalISel] Select csinc if a select has a 1 on RHS.
jaebaek · Oct 16, 2020 · 97eda0c · 97eda0c
2 parents 5efa00a + 4ad4599
commit 97eda0c
Show file tree

Hide file tree

Showing 17 changed files with 477 additions and 7 deletions.
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -422,6 +422,10 @@ BUILTIN(__builtin_altivec_vextduwvrx, "V2ULLiV4UiV4UiUi", "")
 BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "")
 BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "")
 
+// P10 Vector rotate built-ins.
+BUILTIN(__builtin_altivec_vrlqmi, "V1ULLLiV1ULLLiV1ULLLiV1ULLLi", "")
+BUILTIN(__builtin_altivec_vrlqnm, "V1ULLLiV1ULLLiV1ULLLi", "")
+
 // VSX built-ins.
 
 BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")

diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -1372,11 +1372,109 @@ void AggExprEmitter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
   EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddress(), T));
 }
 
+/// Determine whether the given cast kind is known to always convert values
+/// with all zero bits in their value representation to values with all zero
+/// bits in their value representation.
+static bool castPreservesZero(const CastExpr *CE) {
+  switch (CE->getCastKind()) {
+    // No-ops.
+  case CK_NoOp:
+  case CK_UserDefinedConversion:
+  case CK_ConstructorConversion:
+  case CK_BitCast:
+  case CK_ToUnion:
+  case CK_ToVoid:
+    // Conversions between (possibly-complex) integral, (possibly-complex)
+    // floating-point, and bool.
+  case CK_BooleanToSignedIntegral:
+  case CK_FloatingCast:
+  case CK_FloatingComplexCast:
+  case CK_FloatingComplexToBoolean:
+  case CK_FloatingComplexToIntegralComplex:
+  case CK_FloatingComplexToReal:
+  case CK_FloatingRealToComplex:
+  case CK_FloatingToBoolean:
+  case CK_FloatingToIntegral:
+  case CK_IntegralCast:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToBoolean:
+  case CK_IntegralComplexToFloatingComplex:
+  case CK_IntegralComplexToReal:
+  case CK_IntegralRealToComplex:
+  case CK_IntegralToBoolean:
+  case CK_IntegralToFloating:
+    // Reinterpreting integers as pointers and vice versa.
+  case CK_IntegralToPointer:
+  case CK_PointerToIntegral:
+    // Language extensions.
+  case CK_VectorSplat:
+  case CK_NonAtomicToAtomic:
+  case CK_AtomicToNonAtomic:
+    return true;
+
+  case CK_BaseToDerivedMemberPointer:
+  case CK_DerivedToBaseMemberPointer:
+  case CK_MemberPointerToBoolean:
+  case CK_NullToMemberPointer:
+  case CK_ReinterpretMemberPointer:
+    // FIXME: ABI-dependent.
+    return false;
+
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_CPointerToObjCPointerCast:
+  case CK_ObjCObjectLValueCast:
+  case CK_IntToOCLSampler:
+  case CK_ZeroToOCLOpaqueType:
+    // FIXME: Check these.
+    return false;
+
+  case CK_FixedPointCast:
+  case CK_FixedPointToBoolean:
+  case CK_FixedPointToFloating:
+  case CK_FixedPointToIntegral:
+  case CK_FloatingToFixedPoint:
+  case CK_IntegralToFixedPoint:
+    // FIXME: Do all fixed-point types represent zero as all 0 bits?
+    return false;
+
+  case CK_AddressSpaceConversion:
+  case CK_BaseToDerived:
+  case CK_DerivedToBase:
+  case CK_Dynamic:
+  case CK_NullToPointer:
+  case CK_PointerToBoolean:
+    // FIXME: Preserves zeroes only if zero pointers and null pointers have the
+    // same representation in all involved address spaces.
+    return false;
+
+  case CK_ARCConsumeObject:
+  case CK_ARCExtendBlockObject:
+  case CK_ARCProduceObject:
+  case CK_ARCReclaimReturnedObject:
+  case CK_CopyAndAutoreleaseBlockObject:
+  case CK_ArrayToPointerDecay:
+  case CK_FunctionToPointerDecay:
+  case CK_BuiltinFnToFnPtr:
+  case CK_Dependent:
+  case CK_LValueBitCast:
+  case CK_LValueToRValue:
+  case CK_LValueToRValueBitCast:
+  case CK_UncheckedDerivedToBase:
+    return false;
+  }
+}
+
 /// isSimpleZero - If emitting this value will obviously just cause a store of
 /// zero to memory, return true.  This can return false if uncertain, so it just
 /// handles simple cases.
 static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) {
   E = E->IgnoreParens();
+  while (auto *CE = dyn_cast<CastExpr>(E)) {
+    if (!castPreservesZero(CE))
+      break;
+    E = CE->getSubExpr()->IgnoreParens();
+  }
 
   // 0
   if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))

diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
@@ -7927,6 +7927,18 @@ vec_rl(vector unsigned long long __a, vector unsigned long long __b) {
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) {
+  return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector signed __int128)) - __a));
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a));
+}
+#endif
+
 /* vec_rlmi */
 #ifdef __POWER9_VECTOR__
 static __inline__ vector unsigned int __ATTRS_o_ai
@@ -7940,8 +7952,24 @@ vec_rlmi(vector unsigned long long __a, vector unsigned long long __b,
          vector unsigned long long __c) {
   return __builtin_altivec_vrldmi(__a, __c, __b);
 }
+#endif
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b,
+         vector unsigned __int128 __c) {
+  return __builtin_altivec_vrlqmi(__a, __c, __b);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rlmi(vector signed __int128 __a, vector signed __int128 __b,
+         vector signed __int128 __c) {
+  return __builtin_altivec_vrlqmi(__a, __c, __b);
+}
+#endif
 
 /* vec_rlnm */
+#ifdef __POWER9_VECTOR__
 static __inline__ vector unsigned int __ATTRS_o_ai
 vec_rlnm(vector unsigned int __a, vector unsigned int __b,
          vector unsigned int __c) {
@@ -7957,6 +7985,42 @@ vec_rlnm(vector unsigned long long __a, vector unsigned long long __b,
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b,
+         vector unsigned __int128 __c) {
+  // Merge __b and __c using an appropriate shuffle.
+  vector unsigned char TmpB = (vector unsigned char)__b;
+  vector unsigned char TmpC = (vector unsigned char)__c;
+  vector unsigned char MaskAndShift =
+#ifdef __LITTLE_ENDIAN__
+      __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0,
+                              1, -1, -1, -1, -1, -1);
+#else
+      __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1,
+                              -1, -1, -1, -1, -1, -1, -1);
+#endif
+   return __builtin_altivec_vrlqnm(__a, (vector unsigned __int128) MaskAndShift);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rlnm(vector signed __int128 __a, vector signed __int128 __b,
+         vector signed __int128 __c) {
+  // Merge __b and __c using an appropriate shuffle.
+  vector unsigned char TmpB = (vector unsigned char)__b;
+  vector unsigned char TmpC = (vector unsigned char)__c;
+  vector unsigned char MaskAndShift =
+#ifdef __LITTLE_ENDIAN__
+      __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0,
+                              1, -1, -1, -1, -1, -1);
+#else
+      __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1,
+                              -1, -1, -1, -1, -1, -1, -1);
+#endif
+  return __builtin_altivec_vrlqnm(__a, (vector unsigned __int128) MaskAndShift);
+}
+#endif
+
 /* vec_vrlb */
 
 static __inline__ vector signed char __ATTRS_o_ai

diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,7 +17,7 @@ vector signed int vsia, vsib;
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
-vector signed __int128 vsi128a, vsi128b;
+vector signed __int128 vsi128a, vsi128b, vsi128c;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
@@ -1880,3 +1880,53 @@ int test_vec_all_ge_u128(void) {
   // CHECK-NEXT: ret i32
   return vec_all_ge(vui128a, vui128b);
 }
+
+vector signed __int128 test_vec_rl_s128(void) {
+  // CHECK-LABEL: @test_vec_rl_s128(
+  // CHECK: sub <1 x i128>
+  // CHECK-NEXT: lshr <1 x i128>
+  // CHECK-NEXT: or <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_rl(vsi128a, vsi128b);
+}
+
+vector unsigned __int128 test_vec_rl_u128(void) {
+  // CHECK-LABEL: @test_vec_rl_u128(
+  // CHECK: sub <1 x i128>
+  // CHECK: lshr <1 x i128>
+  // CHECK: or <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_rl(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_rlnm_s128(void) {
+  // CHECK-LABEL: @test_vec_rlnm_s128(
+  // CHECK-LE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_rlnm(vsi128a, vsi128b, vsi128c);
+}
+
+vector unsigned __int128 test_vec_rlnm_u128(void) {
+  // CHECK-LABEL: @test_vec_rlnm_u128(
+  // CHECK-LE:  %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_rlnm(vui128a, vui128b, vui128c);
+}
+
+vector signed __int128 test_vec_rlmi_s128(void) {
+  // CHECK-LABEL: @test_vec_rlmi_s128(
+  // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_rlmi(vsi128a, vsi128b, vsi128c);
+}
+
+vector unsigned __int128 test_vec_rlmi_u128(void) {
+  // CHECK-LABEL: @test_vec_rlmi_u128(
+  // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_rlmi(vui128a, vui128b, vui128c);
+}
diff --git a/clang/test/CodeGenCXX/cxx11-initializer-aggregate.cpp b/clang/test/CodeGenCXX/cxx11-initializer-aggregate.cpp
@@ -139,4 +139,35 @@ namespace ZeroInit {
   // CHECK-NOT }
   // CHECK: call {{.*}}memset
   Largeish largeish4() { return (Largeish){}; }
+  // CHECK: define {{.*}}@_ZN8ZeroInit9largeish5Ev(
+  // CHECK-NOT }
+  // CHECK: call {{.*}}memset
+  Largeish largeish5() { return {0, 0, 0}; }
+
+  typedef __attribute__((ext_vector_type(4))) char CI4;
+  struct Conversions {
+    _Complex int a;
+    _Complex float b;
+    short c;
+    long double d;
+    CI4 e;
+    char f;
+    char g;
+    int *h;
+    long i;
+  };
+  // CHECK: define {{.*}}@_ZN8ZeroInit11conversionsEv(
+  // CHECK-NOT }
+  // CHECK: call {{.*}}memset
+  Conversions conversions() {
+    return {0,
+            0,
+            0,
+            0,
+            CI4(0),
+            static_cast<char>(0.0),
+            char(0 + 0i),
+            reinterpret_cast<int *>(0),
+            reinterpret_cast<long>((int *)nullptr)};
+  }
 }
diff --git a/compiler-rt/test/asan/TestCases/strncasecmp_strict.c b/compiler-rt/test/asan/TestCases/strncasecmp_strict.c
@@ -24,7 +24,7 @@
 
 int main(int argc, char **argv) {
   assert(argc >= 2);
-  const size_t size = 100;
+  enum { size = 100 };
   char fill = 'o';
   char s1[size];
   char s2[size];

diff --git a/compiler-rt/test/asan/TestCases/strncmp_strict.c b/compiler-rt/test/asan/TestCases/strncmp_strict.c
@@ -22,7 +22,7 @@
 
 int main(int argc, char **argv) {
   assert(argc >= 2);
-  const size_t size = 100;
+  enum { size = 100 };
   char fill = 'o';
   char s1[size];
   char s2[size];

diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1057,6 +1057,15 @@ def int_ppc_altivec_vrldmi :
                             [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
                             [IntrNoMem]>;
 
+def int_ppc_altivec_vrlqnm :
+      PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty],
+                           [llvm_v1i128_ty, llvm_v1i128_ty],
+                            [IntrNoMem]>;
+def int_ppc_altivec_vrlqmi :
+      PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty],
+                            [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
+                            [IntrNoMem]>;
+
 // Vector Divide Extended Intrinsics.
 def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
 def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -4119,7 +4119,8 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
   MachineIRBuilder MIB(I);
   MachineRegisterInfo &MRI = *MIB.getMRI();
   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
-
+  Register SrcReg1 = I.getOperand(2).getReg();
+  Register SrcReg2 = I.getOperand(3).getReg();
   // We want to recognize this pattern:
   //
   // $z = G_FCMP pred, $x, $y
@@ -4208,6 +4209,31 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
   }
 
   // Emit the select.
+  // We may also be able to emit a CSINC if the RHS operand is a 1.
+  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg1, MRI, TRI);
+  auto ValAndVReg =
+      getConstantVRegValWithLookThrough(SrcReg2, MRI);
+
+  if (SrcRB.getID() == AArch64::GPRRegBankID && ValAndVReg &&
+      ValAndVReg->Value == 1) {
+    unsigned Size = MRI.getType(SrcReg1).getSizeInBits();
+    unsigned Opc = 0;
+    Register Zero;
+    if (Size == 64) {
+      Opc = AArch64::CSINCXr;
+      Zero = AArch64::XZR;
+    } else {
+      Opc = AArch64::CSINCWr;
+      Zero = AArch64::WZR;
+    }
+    auto CSINC =
+        MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {SrcReg1, Zero})
+            .addImm(CondCode);
+    constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
+    I.eraseFromParent();
+    return true;
+  }
+
   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
   auto CSel =
       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -892,6 +892,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
+      setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
     }
 
     setOperationAction(ISD::MUL, MVT::v8i16, Legal);