Skip to content

Commit

Permalink
June 2017
Browse files Browse the repository at this point in the history
  • Loading branch information
walbourn committed Jun 21, 2017
1 parent b83bff1 commit c592223
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Inc/DirectXMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#error DirectX Math requires C++
#endif

#define DIRECTX_MATH_VERSION 310
#define DIRECTX_MATH_VERSION 311

#if defined(_MSC_VER) && (_MSC_VER < 1800)
#error DirectX Math Visual C++ 2013 or later.
Expand Down
16 changes: 8 additions & 8 deletions Inc/DirectXMathMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX mResult;
// Splat the component X,Y,Z then W
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
Expand All @@ -322,7 +322,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
vX = _mm_add_ps(vX,vY);
mResult.r[0] = vX;
// Repeat for the other 3 rows
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
Expand All @@ -342,7 +342,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
vY = _mm_add_ps(vY,vW);
vX = _mm_add_ps(vX,vY);
mResult.r[1] = vX;
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
Expand All @@ -362,7 +362,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiply
vY = _mm_add_ps(vY,vW);
vX = _mm_add_ps(vX,vY);
mResult.r[2] = vX;
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
Expand Down Expand Up @@ -479,7 +479,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
return mResult;
#elif defined(_XM_SSE_INTRINSICS_)
// Splat the component X,Y,Z then W
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
Expand All @@ -503,7 +503,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
vX = _mm_add_ps(vX,vY);
XMVECTOR r0 = vX;
// Repeat for the other 3 rows
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
Expand All @@ -523,7 +523,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
vY = _mm_add_ps(vY,vW);
vX = _mm_add_ps(vX,vY);
XMVECTOR r1 = vX;
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
Expand All @@ -543,7 +543,7 @@ inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
vY = _mm_add_ps(vY,vW);
vX = _mm_add_ps(vX,vY);
XMVECTOR r2 = vX;
#if defined(_XM_AVX_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
Expand Down
8 changes: 7 additions & 1 deletion ReadMe.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ DirectXMath

Copyright (c) Microsoft Corporation. All rights reserved.

January 2017
June 2017

This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library
for use in games and graphics apps
Expand Down Expand Up @@ -59,6 +59,12 @@ https://opensource.microsoft.com/codeofconduct/
RELEASE HISTORY
---------------

June 2017 (3.11)
AVX optimization of XMMatrixMultiply and XMMatrixMultiplyTranspose
AVX2 optimization for XMVectorSplatX
FMA3 optimization of XMVectorMultiplyAdd and XMVectorNegativeMultiplySubtract (implied by /arch:AVX2)
Conformance fixes to support compilation with Clang 3.7

January 2017 (3.10)
Added XMVectorSum for horizontal adds
ARMv8 intrinsics use for ARM64 platform (division, rounding, half-precision conversion)
Expand Down

0 comments on commit c592223

Please sign in to comment.