Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release/8.0-staging] Ensure that Sse3.MoveAndDuplicate correctly tracks supporting SIMD scalar loads #100417

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7652,26 +7652,47 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
}

case NI_SSE2_ConvertToVector128Double:
case NI_SSE3_MoveAndDuplicate:
case NI_AVX_ConvertToVector256Double:
case NI_AVX512F_ConvertToVector512Double:
case NI_AVX512F_VL_ConvertToVector128Double:
case NI_AVX512F_VL_ConvertToVector256Double:
{
assert(!supportsSIMDScalarLoads);

// Most instructions under the non-VEX encoding require aligned operands.
// Those used for Sse2.ConvertToVector128Double (CVTDQ2PD and CVTPS2PD)
// and Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't fail for
// unaligned inputs as they read mem64 (half the vector width) instead
// are exceptions and don't fail for unaligned inputs as they read half
// the vector width instead

supportsAlignedSIMDLoads = !comp->opts.MinOpts();
supportsUnalignedSIMDLoads = true;

const unsigned expectedSize = genTypeSize(parentNode->TypeGet()) / 2;
const unsigned operandSize = genTypeSize(childNode->TypeGet());

// For broadcasts we can only optimize constants and memory operands
const bool broadcastIsContainable = childNode->OperIsConst() || childNode->isMemoryOp();
supportsGeneralLoads =
broadcastIsContainable && supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
if (childNode->OperIsConst() || childNode->isMemoryOp())
{
// For broadcasts we can only optimize constants and memory operands
// since we're going from a smaller base type to a larger base type
supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
}
break;
}

case NI_SSE3_MoveAndDuplicate:
{
// Most instructions under the non-VEX encoding require aligned operands.
// Those used for Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't
// fail for unaligned inputs as they read half the vector width instead

supportsAlignedSIMDLoads = !comp->opts.MinOpts();
supportsUnalignedSIMDLoads = true;

const unsigned expectedSize = genTypeSize(parentNode->TypeGet()) / 2;
const unsigned operandSize = genTypeSize(childNode->TypeGet());

supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
supportsSIMDScalarLoads = true;
break;
}

Expand All @@ -7697,8 +7718,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
break;
}
}

assert(supportsSIMDScalarLoads == false);
break;
}

Expand Down
31 changes: 31 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_100404/Runtime_100404.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using Xunit;

public static class Runtime_100404
{
[Fact]
[MethodImpl(MethodImplOptions.NoInlining)]
public static void TestMultiplyVector128DoubleByConstant()
{
Vector128<double> result = Map(Vector128<double>.One, new FloatPoint(2.0, 3.0));
Assert.Equal(2.0, result[0]);
Assert.Equal(2.0, result[1]);
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static Vector128<double> Map(Vector128<double> m0, FloatPoint point)
{
return m0 * Vector128.Create(point.X);
}

private struct FloatPoint(double x, double y)
{
public double X = x;
public double Y = y;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
Loading