Skip to content
This repository has been archived by the owner on Nov 1, 2020. It is now read-only.

Commit

Permalink
Vectorize and use ROSpan.LastIndexOf as the workhorse for string.Last…
Browse files Browse the repository at this point in the history
…IndexOf (#17370)

* Vectorize and use ROSpan.LastIndexOf as the workhorse for string.LastIndexOf

* Address PR feedback, remove Length == 0 checks where unnecessary.

* Use aligned vector read just like IndexOf

Signed-off-by: dotnet-bot <dotnet-bot@microsoft.com>
  • Loading branch information
ahsonkhan committed Apr 6, 2018
1 parent 7921533 commit 18dee6b
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 112 deletions.
10 changes: 10 additions & 0 deletions src/System.Private.CoreLib/shared/System/MemoryExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,11 @@ public static int LastIndexOf<T>(this Span<T> span, T value)
ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(span)),
Unsafe.As<T, byte>(ref value),
span.Length);
if (typeof(T) == typeof(char))
return SpanHelpers.LastIndexOf(
ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
Unsafe.As<T, char>(ref value),
span.Length);

return SpanHelpers.LastIndexOf<T>(ref MemoryMarshal.GetReference(span), value, span.Length);
}
Expand Down Expand Up @@ -365,6 +370,11 @@ public static int LastIndexOf<T>(this ReadOnlySpan<T> span, T value)
ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(span)),
Unsafe.As<T, byte>(ref value),
span.Length);
if (typeof(T) == typeof(char))
return SpanHelpers.LastIndexOf(
ref Unsafe.As<T, char>(ref MemoryMarshal.GetReference(span)),
Unsafe.As<T, char>(ref value),
span.Length);

return SpanHelpers.LastIndexOf<T>(ref MemoryMarshal.GetReference(span), value, span.Length);
}
Expand Down
122 changes: 121 additions & 1 deletion src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ public static unsafe int SequenceCompareTo(ref char first, int firstLength, ref
while ((byte*)i < (byte*)minLength)
{
int result = Unsafe.Add(ref first, i).CompareTo(Unsafe.Add(ref second, i));
if (result != 0) return result;
if (result != 0)
return result;
i += 1;
}

Expand Down Expand Up @@ -167,6 +168,91 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length)
}
}

public static unsafe int LastIndexOf(ref char searchSpace, char value, int length)
{
Debug.Assert(length >= 0);

fixed (char* pChars = &searchSpace)
{
char* pCh = pChars + length;
char* pEndCh = pChars;

#if !netstandard11
if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
{
const int elementsPerByte = sizeof(ushort) / sizeof(byte);
length = (((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte);
}
SequentialScan:
#endif
while (length >= 4)
{
length -= 4;
pCh -= 4;

if (*(pCh + 3) == value)
goto Found3;
if (*(pCh + 2) == value)
goto Found2;
if (*(pCh + 1) == value)
goto Found1;
if (*pCh == value)
goto Found;
}

while (length > 0)
{
length -= 1;
pCh -= 1;

if (*pCh == value)
goto Found;
}
#if !netstandard11
// We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
if (Vector.IsHardwareAccelerated && pCh > pEndCh)
{
length = (int)((pCh - pEndCh) & ~(Vector<ushort>.Count - 1));

// Get comparison Vector
Vector<ushort> vComparison = new Vector<ushort>(value);

while (length > 0)
{
char* pStart = pCh - Vector<ushort>.Count;
// Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh (and hence pSart) is always vector aligned
Debug.Assert(((int)pStart & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
Vector<ushort> vMatches = Vector.Equals(vComparison, Unsafe.Read<Vector<ushort>>(pStart));
if (Vector<ushort>.Zero.Equals(vMatches))
{
pCh -= Vector<ushort>.Count;
length -= Vector<ushort>.Count;
continue;
}
// Find offset of last match
return (int)(pStart - pEndCh) + LocateLastFoundChar(vMatches);
}

if (pCh > pEndCh)
{
length = (int)(pCh - pEndCh);
goto SequentialScan;
}
}
#endif
return -1;
Found:
return (int)(pCh - pEndCh);
Found1:
return (int)(pCh - pEndCh) + 1;
Found2:
return (int)(pCh - pEndCh) + 2;
Found3:
return (int)(pCh - pEndCh) + 3;
}
}

#if !netstandard11
// Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand Down Expand Up @@ -204,6 +290,40 @@ private static int LocateFirstFoundChar(ulong match)
private const ulong XorPowerOfTwoToHighChar = (0x03ul |
0x02ul << 16 |
0x01ul << 32) + 1;

// Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LocateLastFoundChar(Vector<ushort> match)
{
var vector64 = Vector.AsVectorUInt64(match);
ulong candidate = 0;
int i = Vector<ulong>.Count - 1;
// Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
for (; i >= 0; i--)
{
candidate = vector64[i];
if (candidate != 0)
{
break;
}
}

// Single LEA instruction with jitted const (using function result)
return i * 4 + LocateLastFoundChar(candidate);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LocateLastFoundChar(ulong match)
{
// Find the most significant char that has its highest bit set
int index = 3;
while ((long)match > 0)
{
match = match << 16;
index--;
}
return index;
}
#endif
}
}
115 changes: 4 additions & 111 deletions src/System.Private.CoreLib/shared/System/String.Searching.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ public int IndexOf(char value, StringComparison comparisonType)

public unsafe int IndexOf(char value, int startIndex, int count)
{
// These bounds checks are redundant since AsSpan does them already, but are required
// so that the correct parameter name is thrown as part of the exception.
if ((uint)startIndex > (uint)Length)
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);

Expand Down Expand Up @@ -355,10 +353,7 @@ public int IndexOf(string value, int startIndex, int count, StringComparison com
// The character at position startIndex is included in the search. startIndex is the larger
// index within the string.
//
public int LastIndexOf(char value)
{
return LastIndexOf(value, this.Length - 1, this.Length);
}
public int LastIndexOf(char value) => SpanHelpers.LastIndexOf(ref _firstChar, value, Length);

public int LastIndexOf(char value, int startIndex)
{
Expand All @@ -376,112 +371,10 @@ public unsafe int LastIndexOf(char value, int startIndex, int count)
if ((uint)count > (uint)startIndex + 1)
throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);

fixed (char* pChars = &_firstChar)
{
char* pCh = pChars + startIndex;
char* pEndCh = pCh - count;

//We search [startIndex..EndIndex]
if (Vector.IsHardwareAccelerated && count >= Vector<ushort>.Count * 2)
{
unchecked
{
const int elementsPerByte = sizeof(ushort) / sizeof(byte);
count = (((int)pCh & (Vector<byte>.Count - 1)) / elementsPerByte) + 1;
}
}
SequentialScan:
while (count >= 4)
{
if (*pCh == value) goto ReturnIndex;
if (*(pCh - 1) == value) goto ReturnIndex1;
if (*(pCh - 2) == value) goto ReturnIndex2;
if (*(pCh - 3) == value) goto ReturnIndex3;

count -= 4;
pCh -= 4;
}

while (count > 0)
{
if (*pCh == value)
goto ReturnIndex;

count--;
pCh--;
}

if (pCh > pEndCh)
{
count = (int)((pCh - pEndCh) & ~(Vector<ushort>.Count - 1));

// Get comparison Vector
Vector<ushort> vComparison = new Vector<ushort>(value);
while (count > 0)
{
char* pStart = pCh - Vector<ushort>.Count + 1;
var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned<Vector<ushort>>(pStart));
if (Vector<ushort>.Zero.Equals(vMatches))
{
pCh -= Vector<ushort>.Count;
count -= Vector<ushort>.Count;
continue;
}
// Find offset of last match
return (int)(pStart - pChars) + LocateLastFoundChar(vMatches);
}

if (pCh > pEndCh)
{
unchecked
{
count = (int)(pCh - pEndCh);
}
goto SequentialScan;
}
}
return -1;

ReturnIndex3: pCh--;
ReturnIndex2: pCh--;
ReturnIndex1: pCh--;
ReturnIndex:
return (int)(pCh - pChars);
}
}

// Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LocateLastFoundChar(Vector<ushort> match)
{
var vector64 = Vector.AsVectorUInt64(match);
ulong candidate = 0;
int i = Vector<ulong>.Count - 1;
// Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
for (; i >= 0; i--)
{
candidate = vector64[i];
if (candidate != 0)
{
break;
}
}
int startSearchAt = startIndex + 1 - count;
int result = SpanHelpers.LastIndexOf(ref Unsafe.Add(ref _firstChar, startSearchAt), value, count);

// Single LEA instruction with jitted const (using function result)
return i * 4 + LocateLastFoundChar(candidate);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LocateLastFoundChar(ulong match)
{
// Find the most significant char that has its highest bit set
int index = 3;
while ((long)match > 0)
{
match = match << 16;
index--;
}
return index;
return result == -1 ? result : result + startSearchAt;
}

// Returns the index of the last occurrence of any specified character in the current instance.
Expand Down

0 comments on commit 18dee6b

Please sign in to comment.