Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make getting a SourceText from an IDE parsed tree not hit the LOH. #73494

Merged
merged 22 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
using System.Diagnostics.CodeAnalysis;
using System.Text;
using System.Threading;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.Shared.Extensions;
using Microsoft.CodeAnalysis.Text;

namespace Microsoft.CodeAnalysis.CSharp;
Expand All @@ -25,7 +27,13 @@ private sealed class ParsedSyntaxTree : CSharpSyntaxTree

private SourceText? _lazyText;

public ParsedSyntaxTree(SourceText? lazyText, CSharpSyntaxNode root, CSharpParseOptions options, string filePath, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm)
public ParsedSyntaxTree(
SourceText? lazyText,
CSharpSyntaxNode root,
CSharpParseOptions options,
string filePath,
Encoding? encoding,
SourceHashAlgorithm checksumAlgorithm)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just wrapping.

{
_lazyText = lazyText;
_root = CloneNodeAsRoot(root);
Expand All @@ -40,7 +48,8 @@ public override SourceText GetText(CancellationToken cancellationToken)
{
if (_lazyText == null)
{
Interlocked.CompareExchange(ref _lazyText, GetRoot(cancellationToken).GetText(Encoding, _checksumAlgorithm), null);
var sourceText = SourceTextExtensions.CreateSourceText(GetRoot(cancellationToken), Encoding, _checksumAlgorithm, cancellationToken);
Interlocked.CompareExchange(ref _lazyText, sourceText, null);
}

return _lazyText;
Expand Down Expand Up @@ -68,10 +77,20 @@ public override bool TryGetRoot([NotNullWhen(true)] out CSharpSyntaxNode? root)
}

public override SyntaxTree WithRootAndOptions(SyntaxNode root, ParseOptions options)
=> (root == _root && options == Options) ? this : new ParsedSyntaxTree((root == _root) ? _lazyText : null, (CSharpSyntaxNode)root, (CSharpParseOptions)options, FilePath, Encoding, _checksumAlgorithm);
=> root == _root && options == Options
? this
: new ParsedSyntaxTree(
root == _root ? _lazyText : null,
(CSharpSyntaxNode)root,
(CSharpParseOptions)options,
FilePath,
Encoding,
_checksumAlgorithm);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just wrapping. no change.


public override SyntaxTree WithFilePath(string path)
=> (path == FilePath) ? this : new ParsedSyntaxTree(_lazyText, _root, Options, path, Encoding, _checksumAlgorithm);
=> path == FilePath
? this
: new ParsedSyntaxTree(_lazyText, _root, Options, path, Encoding, _checksumAlgorithm);

public override SyntaxReference GetReference(SyntaxNode node)
=> new NodeSyntaxReference(node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

using System;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using Microsoft.CodeAnalysis.Host;
Expand All @@ -17,15 +19,20 @@ namespace Microsoft.CodeAnalysis.Shared.Extensions;

internal static partial class SourceTextExtensions
{
// char pooled memory : 8K * 256 = 2MB
private const int ObjectPoolCount = 1024;

// char array length: 4k characters. 4K * 1024 (object pool count) * 2 (bytes per char) = 8MB
private const int CharArrayLength = 4 * 1024;

// 32k characters. Equivalent to 64KB in memory bytes. Will not be put into the LOH.
public const int SourceTextLengthThreshold = 32 * 1024;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in double thsi value from waht it was before.


/// <summary>
/// Note: there is a strong invariant that you only get arrays back from this that are exactly <see
/// cref="CharArrayLength"/> long. Putting arrays back into this of the wrong length will result in broken
/// behavior. Do not expose this pool outside of this class.
/// </summary>
private static readonly ObjectPool<char[]> s_charArrayPool = new(() => new char[CharArrayLength], 256);
private static readonly ObjectPool<char[]> s_charArrayPool = new(() => new char[CharArrayLength], ObjectPoolCount);

public static void GetLineAndOffset(this SourceText text, int position, out int lineNumber, out int offset)
{
Expand Down Expand Up @@ -168,9 +175,6 @@ public static int IndexOfNonWhiteSpace(this SourceText text, int start, int leng
return -1;
}

// 32KB. comes from SourceText char buffer size and less than large object size
internal const int SourceTextLengthThreshold = 32 * 1024 / sizeof(char);

public static void WriteTo(this SourceText sourceText, ObjectWriter writer, CancellationToken cancellationToken)
{
// Source length
Expand Down Expand Up @@ -229,20 +233,103 @@ private static void WriteChunksTo(SourceText sourceText, ObjectWriter writer, in

public static SourceText ReadFrom(ITextFactoryService textService, ObjectReader reader, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm, CancellationToken cancellationToken)
{
using var textReader = ObjectReaderTextReader.Create(reader);
using var textReader = CharArrayChunkTextReader.CreateFromObjectReader(reader);

return textService.CreateText(textReader, encoding, checksumAlgorithm, cancellationToken);
}

private sealed class ObjectReaderTextReader : TextReaderWithLength
public static SourceText CreateSourceText(
SyntaxNode node, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm, CancellationToken cancellationToken)
{
// If this node is small enough to not go into the LOH, we can just fast path directly to creating a SourceText from it.
var totalLength = node.FullWidth();
if (totalLength <= SourceTextLengthThreshold)
return SourceText.From(node.ToFullString(), encoding, checksumAlgorithm);

// Allocate the space to write the node into. Explicitly chunked so that nothing goes into the LOH.
var chunks = CreateChunks(totalLength);

// Write the node into that temp space.
using var chunkWriter = new CharArrayChunkTextWriter(totalLength, chunks, encoding!, cancellationToken);
node.WriteTo(chunkWriter);
Contract.ThrowIfTrue(totalLength != chunkWriter.Position);

// Call into the text service to make us a SourceText from the chunks. Disposal of this reader will free all
// the intermediary chunks we allocated.

Contract.ThrowIfTrue(chunks.Any(static (c, s) => c.Length != s, CharArrayLength));

using var chunkReader = new CharArrayChunkTextReader(chunks, totalLength);
var result = SourceText.From(chunkReader, totalLength, encoding, checksumAlgorithm);
Copy link
Contributor

@ToddGrun ToddGrun May 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SourceText.From

It feels like there is an inefficiency here. This SourceText.From call takes the reader you have built up over these chunks, detects it's large, and will call LargeText.Decode, which will create chunks over the data.

I feel like we're jumping through hoops to do this at this level, when maybe it should be at the compiler level. #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. but we have no efficient paths at all. We can simultaneously explore a different way of the compiler exposing things (like wrapping an ImmutableArray<ImmutableArray<char>> chunks system), but that can happen outdie of this PR.

Note: while any approach today involves multiple copies, this approach at least makes one of the copy non LOH and also pooled.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough

Contract.ThrowIfTrue(totalLength != chunkReader.Position);

return result;

static ImmutableArray<char[]> CreateChunks(int totalLength)
{
var numberOfChunks = 1 + (totalLength / CharArrayLength);
var buffer = new FixedSizeArrayBuilder<char[]>(numberOfChunks);
for (var i = 0; i < numberOfChunks; i++)
buffer.Add(s_charArrayPool.Allocate());

return buffer.MoveToImmutable();
}
}

private static int GetIndexFromPosition(int position) => position / CharArrayLength;
private static int GetColumnFromPosition(int position) => position % CharArrayLength;

private sealed class CharArrayChunkTextWriter(
int totalLength, ImmutableArray<char[]> chunks, Encoding encoding, CancellationToken cancellationToken) : TextWriter
{
private readonly int _totalLength = totalLength;
private readonly ImmutableArray<char[]> _chunks = chunks;
private readonly CancellationToken _cancellationToken = cancellationToken;

/// <summary>
/// Public so that caller can assert that writing out the text actually wrote out the full text of the node.
/// </summary>
public int Position { get; private set; }

public override Encoding Encoding { get; } = encoding;

public override void Write(string? value)
{
Contract.ThrowIfNull(value);

var valueSpan = value.AsSpan();
while (valueSpan.Length > 0)
Copy link
Contributor

@ToddGrun ToddGrun May 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while (valueSpan.Length > 0)

feel free to ignore this, but I find something like the following a bit easier to read:

var index = 0;
foreach (var chunk in _chunks)
{
    var copyLength = Math.Min(value.Length - index, chunk.Length);
    value.CopyTo(index, chunk, 0, copyLength);

    index += chunk.Length;
}
``` #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems not correct since you're restarting at the first chunk each time a string is written in.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, duh, multiple write calls

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, my current rewrite of this doesn't really look simpler than what you've got

{
_cancellationToken.ThrowIfCancellationRequested();

var chunk = _chunks[GetIndexFromPosition(Position)];
Contract.ThrowIfTrue(chunk.Length != CharArrayLength);

var chunkIndex = GetColumnFromPosition(Position);
Contract.ThrowIfTrue(chunkIndex >= CharArrayLength);

var count = Math.Min(valueSpan.Length, CharArrayLength - chunkIndex);
valueSpan[..count].CopyTo(chunk.AsSpan().Slice(chunkIndex, count));

Position += count;
valueSpan = valueSpan[count..];
}

Contract.ThrowIfTrue(Position > _totalLength);
}
}

private sealed class CharArrayChunkTextReader(ImmutableArray<char[]> chunks, int length) : TextReaderWithLength(length)
{
private readonly ImmutableArray<char[]> _chunks;
private readonly int _chunkSize;
private bool _disposed;
private readonly ImmutableArray<char[]> _chunks = chunks;
private bool _disposed = false;

private int _position;
/// <summary>
/// Public so that the caller can assert that the new SourceText read all the way to the end of this successfully.
CyrusNajmabadi marked this conversation as resolved.
Show resolved Hide resolved
/// </summary>
public int Position { get; private set; }

public static TextReader Create(ObjectReader reader)
public static TextReader CreateFromObjectReader(ObjectReader reader)
{
var length = reader.ReadInt32();
if (length < SourceTextLengthThreshold)
Expand All @@ -256,7 +343,7 @@ public static TextReader Create(ObjectReader reader)
var numberOfChunks = reader.ReadInt32();

// read as chunks
using var _ = ArrayBuilder<char[]>.GetInstance(numberOfChunks, out var chunks);
var chunks = new FixedSizeArrayBuilder<char[]>(numberOfChunks);

var offset = 0;
for (var i = 0; i < numberOfChunks; i++)
Expand All @@ -279,17 +366,11 @@ public static TextReader Create(ObjectReader reader)
}

Contract.ThrowIfFalse(offset == length);
return new ObjectReaderTextReader(chunks.ToImmutableAndClear(), chunkSize, length);
}

private ObjectReaderTextReader(ImmutableArray<char[]> chunks, int chunkSize, int length)
: base(length)
{
_chunks = chunks;
_chunkSize = chunkSize;
_disposed = false;
Contract.ThrowIfTrue(chunkSize != CharArrayLength);
Contract.ThrowIfTrue(chunks.Any(static (c, s) => c.Length != s, chunkSize));
var chunksArray = chunks.MoveToImmutable();
Contract.ThrowIfTrue(chunksArray.Any(static (c, s) => c.Length != s, CharArrayLength));

return new CharArrayChunkTextReader(chunksArray, length);
}

protected override void Dispose(bool disposing)
Expand All @@ -308,53 +389,49 @@ protected override void Dispose(bool disposing)

public override int Peek()
{
if (_position >= Length)
{
if (Position >= Length)
return -1;
}

return Read(_position);
return Read(Position);
}

public override int Read()
{
if (_position >= Length)
{
if (Position >= Length)
return -1;
}

return Read(_position++);
return Read(Position++);
}

private int Read(int position)
{
var chunkIndex = GetIndexFromPosition(position);
var chunkColumn = GetColumnFromPosition(position);

return _chunks[chunkIndex][chunkColumn];
}

public override int Read(char[] buffer, int index, int count)
{
if (buffer == null)
{
throw new ArgumentNullException(nameof(buffer));
}

if (index < 0 || index >= buffer.Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}

if (count < 0 || (index + count) > buffer.Length)
{
throw new ArgumentOutOfRangeException(nameof(count));
}

// check quick bail out
if (count == 0)
{
return 0;
}

// adjust to actual char to read
var totalCharsToRead = Math.Min(count, Length - _position);
var totalCharsToRead = Math.Min(count, Length - Position);
count = totalCharsToRead;

var chunkIndex = GetIndexFromPosition(_position);
var chunkStartOffset = GetColumnFromPosition(_position);
var chunkIndex = GetIndexFromPosition(Position);
var chunkStartOffset = GetColumnFromPosition(Position);

while (true)
{
Expand All @@ -374,19 +451,9 @@ public override int Read(char[] buffer, int index, int count)
chunkIndex++;
}

_position += totalCharsToRead;
Position += totalCharsToRead;
Contract.ThrowIfTrue(Position > Length);
return totalCharsToRead;
}

private int Read(int position)
{
var chunkIndex = GetIndexFromPosition(position);
var chunkColumn = GetColumnFromPosition(position);

return _chunks[chunkIndex][chunkColumn];
}

private int GetIndexFromPosition(int position) => position / _chunkSize;
private int GetColumnFromPosition(int position) => position % _chunkSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,10 @@

#nullable disable

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.Host;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,25 @@ Namespace Microsoft.CodeAnalysis.VisualBasic

Private _lazyText As SourceText

Public Sub New(lazyText As SourceText, root As VisualBasicSyntaxNode, options As VisualBasicParseOptions, filePath As String, encoding As Encoding, checksumAlgorithm As SourceHashAlgorithm)
Public Sub New(
lazyText As SourceText,
root As VisualBasicSyntaxNode,
options As VisualBasicParseOptions,
filePath As String,
encoding As Encoding,
checksumAlgorithm As SourceHashAlgorithm)
_lazyText = lazyText
_root = CloneNodeAsRoot(root)
_checksumAlgorithm = checksumAlgorithm

Me.Encoding = encoding
Me.Options = options
Me.FilePath = filePath
End Sub

Public Overrides Function GetText(Optional cancellationToken As CancellationToken = Nothing) As SourceText
If _lazyText Is Nothing Then
Interlocked.CompareExchange(_lazyText, GetRoot(cancellationToken).GetText(Encoding, _checksumAlgorithm), Nothing)
Dim text = SourceTextExtensions.CreateSourceText(GetRoot(cancellationToken), Encoding, _checksumAlgorithm, cancellationToken)
Interlocked.CompareExchange(_lazyText, text, Nothing)
End If

Return _lazyText
Expand Down Expand Up @@ -71,7 +77,13 @@ Namespace Microsoft.CodeAnalysis.VisualBasic
Public Overrides Function WithRootAndOptions(root As SyntaxNode, options As ParseOptions) As SyntaxTree
Return If(ReferenceEquals(root, _root) AndAlso options = Me.Options,
Me,
New ParsedSyntaxTree(If(ReferenceEquals(root, _root), _lazyText, Nothing), DirectCast(root, VisualBasicSyntaxNode), DirectCast(options, VisualBasicParseOptions), FilePath, Encoding, _checksumAlgorithm))
New ParsedSyntaxTree(
If(ReferenceEquals(root, _root), _lazyText, Nothing),
DirectCast(root, VisualBasicSyntaxNode),
DirectCast(options, VisualBasicParseOptions),
FilePath,
Encoding,
_checksumAlgorithm))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just wrapping. no change.

End Function

Public Overrides Function WithFilePath(path As String) As SyntaxTree
Expand Down
Loading