-
Notifications
You must be signed in to change notification settings - Fork 4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make getting a SourceText from an IDE parsed tree not hit the LOH. #73494
Changes from all commits
fbb2d44
87b3c8b
56e4d13
8384a7f
65b3b71
dcdd431
502b651
b46e963
09db6ea
c2c2c8e
6c8d03a
e873caa
1fd8b14
40ba159
9502336
6ab93f2
75354ef
45c2f71
6e25f22
f84f8b7
a2eb513
1f23973
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ | |
using System.Diagnostics.CodeAnalysis; | ||
using System.Text; | ||
using System.Threading; | ||
using Microsoft.CodeAnalysis.Host; | ||
using Microsoft.CodeAnalysis.Shared.Extensions; | ||
using Microsoft.CodeAnalysis.Text; | ||
|
||
namespace Microsoft.CodeAnalysis.CSharp; | ||
|
@@ -25,7 +27,13 @@ private sealed class ParsedSyntaxTree : CSharpSyntaxTree | |
|
||
private SourceText? _lazyText; | ||
|
||
public ParsedSyntaxTree(SourceText? lazyText, CSharpSyntaxNode root, CSharpParseOptions options, string filePath, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm) | ||
public ParsedSyntaxTree( | ||
SourceText? lazyText, | ||
CSharpSyntaxNode root, | ||
CSharpParseOptions options, | ||
string filePath, | ||
Encoding? encoding, | ||
SourceHashAlgorithm checksumAlgorithm) | ||
{ | ||
_lazyText = lazyText; | ||
_root = CloneNodeAsRoot(root); | ||
|
@@ -40,7 +48,8 @@ public override SourceText GetText(CancellationToken cancellationToken) | |
{ | ||
if (_lazyText == null) | ||
{ | ||
Interlocked.CompareExchange(ref _lazyText, GetRoot(cancellationToken).GetText(Encoding, _checksumAlgorithm), null); | ||
var sourceText = SourceTextExtensions.CreateSourceText(GetRoot(cancellationToken), Encoding, _checksumAlgorithm, cancellationToken); | ||
Interlocked.CompareExchange(ref _lazyText, sourceText, null); | ||
} | ||
|
||
return _lazyText; | ||
|
@@ -68,10 +77,20 @@ public override bool TryGetRoot([NotNullWhen(true)] out CSharpSyntaxNode? root) | |
} | ||
|
||
public override SyntaxTree WithRootAndOptions(SyntaxNode root, ParseOptions options) | ||
=> (root == _root && options == Options) ? this : new ParsedSyntaxTree((root == _root) ? _lazyText : null, (CSharpSyntaxNode)root, (CSharpParseOptions)options, FilePath, Encoding, _checksumAlgorithm); | ||
=> root == _root && options == Options | ||
? this | ||
: new ParsedSyntaxTree( | ||
root == _root ? _lazyText : null, | ||
(CSharpSyntaxNode)root, | ||
(CSharpParseOptions)options, | ||
FilePath, | ||
Encoding, | ||
_checksumAlgorithm); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just wrapping. no change. |
||
|
||
public override SyntaxTree WithFilePath(string path) | ||
=> (path == FilePath) ? this : new ParsedSyntaxTree(_lazyText, _root, Options, path, Encoding, _checksumAlgorithm); | ||
=> path == FilePath | ||
? this | ||
: new ParsedSyntaxTree(_lazyText, _root, Options, path, Encoding, _checksumAlgorithm); | ||
|
||
public override SyntaxReference GetReference(SyntaxNode node) | ||
=> new NodeSyntaxReference(node); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,9 @@ | |
|
||
using System; | ||
using System.Collections.Immutable; | ||
using System.Diagnostics; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading; | ||
using Microsoft.CodeAnalysis.Host; | ||
|
@@ -17,15 +19,20 @@ namespace Microsoft.CodeAnalysis.Shared.Extensions; | |
|
||
internal static partial class SourceTextExtensions | ||
{ | ||
// char pooled memory : 8K * 256 = 2MB | ||
private const int ObjectPoolCount = 1024; | ||
|
||
// char array length: 4k characters. 4K * 1024 (object pool count) * 2 (bytes per char) = 8MB | ||
private const int CharArrayLength = 4 * 1024; | ||
|
||
// 32k characters. Equivalent to 64KB in memory bytes. Will not be put into the LOH. | ||
public const int SourceTextLengthThreshold = 32 * 1024; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in double thsi value from waht it was before. |
||
|
||
/// <summary> | ||
/// Note: there is a strong invariant that you only get arrays back from this that are exactly <see | ||
/// cref="CharArrayLength"/> long. Putting arrays back into this of the wrong length will result in broken | ||
/// behavior. Do not expose this pool outside of this class. | ||
/// </summary> | ||
private static readonly ObjectPool<char[]> s_charArrayPool = new(() => new char[CharArrayLength], 256); | ||
private static readonly ObjectPool<char[]> s_charArrayPool = new(() => new char[CharArrayLength], ObjectPoolCount); | ||
|
||
public static void GetLineAndOffset(this SourceText text, int position, out int lineNumber, out int offset) | ||
{ | ||
|
@@ -168,9 +175,6 @@ public static int IndexOfNonWhiteSpace(this SourceText text, int start, int leng | |
return -1; | ||
} | ||
|
||
// 32KB. comes from SourceText char buffer size and less than large object size | ||
internal const int SourceTextLengthThreshold = 32 * 1024 / sizeof(char); | ||
|
||
public static void WriteTo(this SourceText sourceText, ObjectWriter writer, CancellationToken cancellationToken) | ||
{ | ||
// Source length | ||
|
@@ -229,20 +233,103 @@ private static void WriteChunksTo(SourceText sourceText, ObjectWriter writer, in | |
|
||
public static SourceText ReadFrom(ITextFactoryService textService, ObjectReader reader, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm, CancellationToken cancellationToken) | ||
{ | ||
using var textReader = ObjectReaderTextReader.Create(reader); | ||
using var textReader = CharArrayChunkTextReader.CreateFromObjectReader(reader); | ||
|
||
return textService.CreateText(textReader, encoding, checksumAlgorithm, cancellationToken); | ||
} | ||
|
||
private sealed class ObjectReaderTextReader : TextReaderWithLength | ||
public static SourceText CreateSourceText( | ||
SyntaxNode node, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm, CancellationToken cancellationToken) | ||
{ | ||
// If this node is small enough to not go into the LOH, we can just fast path directly to creating a SourceText from it. | ||
var totalLength = node.FullWidth(); | ||
if (totalLength <= SourceTextLengthThreshold) | ||
return SourceText.From(node.ToFullString(), encoding, checksumAlgorithm); | ||
|
||
// Allocate the space to write the node into. Explicitly chunked so that nothing goes into the LOH. | ||
var chunks = CreateChunks(totalLength); | ||
|
||
// Write the node into that temp space. | ||
using var chunkWriter = new CharArrayChunkTextWriter(totalLength, chunks, encoding!, cancellationToken); | ||
node.WriteTo(chunkWriter); | ||
Contract.ThrowIfTrue(totalLength != chunkWriter.Position); | ||
|
||
// Call into the text service to make us a SourceText from the chunks. Disposal of this reader will free all | ||
// the intermediary chunks we allocated. | ||
|
||
Contract.ThrowIfTrue(chunks.Any(static (c, s) => c.Length != s, CharArrayLength)); | ||
|
||
using var chunkReader = new CharArrayChunkTextReader(chunks, totalLength); | ||
var result = SourceText.From(chunkReader, totalLength, encoding, checksumAlgorithm); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels like there is an inefficiency here. This SourceText.From call takes the reader you have built up over these chunks, detects it's large, and will call LargeText.Decode, which will create chunks over the data. I feel like we're jumping through hoops to do this at this level, when maybe it should be at the compiler level. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. but we have no efficient paths at all. We can simultaneously explore a different way of the compiler exposing things (like wrapping an Note: while any approach today involves multiple copies, this approach at least makes one of the copy non LOH and also pooled. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough |
||
Contract.ThrowIfTrue(totalLength != chunkReader.Position); | ||
|
||
return result; | ||
|
||
static ImmutableArray<char[]> CreateChunks(int totalLength) | ||
{ | ||
var numberOfChunks = 1 + (totalLength / CharArrayLength); | ||
var buffer = new FixedSizeArrayBuilder<char[]>(numberOfChunks); | ||
for (var i = 0; i < numberOfChunks; i++) | ||
buffer.Add(s_charArrayPool.Allocate()); | ||
|
||
return buffer.MoveToImmutable(); | ||
} | ||
} | ||
|
||
private static int GetIndexFromPosition(int position) => position / CharArrayLength; | ||
private static int GetColumnFromPosition(int position) => position % CharArrayLength; | ||
|
||
private sealed class CharArrayChunkTextWriter( | ||
int totalLength, ImmutableArray<char[]> chunks, Encoding encoding, CancellationToken cancellationToken) : TextWriter | ||
{ | ||
private readonly int _totalLength = totalLength; | ||
private readonly ImmutableArray<char[]> _chunks = chunks; | ||
private readonly CancellationToken _cancellationToken = cancellationToken; | ||
|
||
/// <summary> | ||
/// Public so that caller can assert that writing out the text actually wrote out the full text of the node. | ||
/// </summary> | ||
public int Position { get; private set; } | ||
|
||
public override Encoding Encoding { get; } = encoding; | ||
|
||
public override void Write(string? value) | ||
{ | ||
Contract.ThrowIfNull(value); | ||
|
||
var valueSpan = value.AsSpan(); | ||
while (valueSpan.Length > 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. feel free to ignore this, but I find something like the following a bit easier to read:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems not correct since you're restarting at the first chunk each time a string is written in. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, duh, multiple write calls There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, my current rewrite of this doesn't really look simpler than what you've got |
||
{ | ||
_cancellationToken.ThrowIfCancellationRequested(); | ||
|
||
var chunk = _chunks[GetIndexFromPosition(Position)]; | ||
Contract.ThrowIfTrue(chunk.Length != CharArrayLength); | ||
|
||
var chunkIndex = GetColumnFromPosition(Position); | ||
Contract.ThrowIfTrue(chunkIndex >= CharArrayLength); | ||
|
||
var count = Math.Min(valueSpan.Length, CharArrayLength - chunkIndex); | ||
valueSpan[..count].CopyTo(chunk.AsSpan().Slice(chunkIndex, count)); | ||
|
||
Position += count; | ||
valueSpan = valueSpan[count..]; | ||
} | ||
|
||
Contract.ThrowIfTrue(Position > _totalLength); | ||
} | ||
} | ||
|
||
private sealed class CharArrayChunkTextReader(ImmutableArray<char[]> chunks, int length) : TextReaderWithLength(length) | ||
{ | ||
private readonly ImmutableArray<char[]> _chunks; | ||
private readonly int _chunkSize; | ||
private bool _disposed; | ||
private readonly ImmutableArray<char[]> _chunks = chunks; | ||
private bool _disposed = false; | ||
|
||
private int _position; | ||
/// <summary> | ||
/// Public so that the caller can assert that the new SourceText read all the way to the end of this successfully. | ||
CyrusNajmabadi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// </summary> | ||
public int Position { get; private set; } | ||
|
||
public static TextReader Create(ObjectReader reader) | ||
public static TextReader CreateFromObjectReader(ObjectReader reader) | ||
{ | ||
var length = reader.ReadInt32(); | ||
if (length < SourceTextLengthThreshold) | ||
|
@@ -256,7 +343,7 @@ public static TextReader Create(ObjectReader reader) | |
var numberOfChunks = reader.ReadInt32(); | ||
|
||
// read as chunks | ||
using var _ = ArrayBuilder<char[]>.GetInstance(numberOfChunks, out var chunks); | ||
var chunks = new FixedSizeArrayBuilder<char[]>(numberOfChunks); | ||
|
||
var offset = 0; | ||
for (var i = 0; i < numberOfChunks; i++) | ||
|
@@ -279,17 +366,11 @@ public static TextReader Create(ObjectReader reader) | |
} | ||
|
||
Contract.ThrowIfFalse(offset == length); | ||
return new ObjectReaderTextReader(chunks.ToImmutableAndClear(), chunkSize, length); | ||
} | ||
|
||
private ObjectReaderTextReader(ImmutableArray<char[]> chunks, int chunkSize, int length) | ||
: base(length) | ||
{ | ||
_chunks = chunks; | ||
_chunkSize = chunkSize; | ||
_disposed = false; | ||
Contract.ThrowIfTrue(chunkSize != CharArrayLength); | ||
Contract.ThrowIfTrue(chunks.Any(static (c, s) => c.Length != s, chunkSize)); | ||
var chunksArray = chunks.MoveToImmutable(); | ||
Contract.ThrowIfTrue(chunksArray.Any(static (c, s) => c.Length != s, CharArrayLength)); | ||
|
||
return new CharArrayChunkTextReader(chunksArray, length); | ||
} | ||
|
||
protected override void Dispose(bool disposing) | ||
|
@@ -308,53 +389,49 @@ protected override void Dispose(bool disposing) | |
|
||
public override int Peek() | ||
{ | ||
if (_position >= Length) | ||
{ | ||
if (Position >= Length) | ||
return -1; | ||
} | ||
|
||
return Read(_position); | ||
return Read(Position); | ||
} | ||
|
||
public override int Read() | ||
{ | ||
if (_position >= Length) | ||
{ | ||
if (Position >= Length) | ||
return -1; | ||
} | ||
|
||
return Read(_position++); | ||
return Read(Position++); | ||
} | ||
|
||
private int Read(int position) | ||
{ | ||
var chunkIndex = GetIndexFromPosition(position); | ||
var chunkColumn = GetColumnFromPosition(position); | ||
|
||
return _chunks[chunkIndex][chunkColumn]; | ||
} | ||
|
||
public override int Read(char[] buffer, int index, int count) | ||
{ | ||
if (buffer == null) | ||
{ | ||
throw new ArgumentNullException(nameof(buffer)); | ||
} | ||
|
||
if (index < 0 || index >= buffer.Length) | ||
{ | ||
throw new ArgumentOutOfRangeException(nameof(index)); | ||
} | ||
|
||
if (count < 0 || (index + count) > buffer.Length) | ||
{ | ||
throw new ArgumentOutOfRangeException(nameof(count)); | ||
} | ||
|
||
// check quick bail out | ||
if (count == 0) | ||
{ | ||
return 0; | ||
} | ||
|
||
// adjust to actual char to read | ||
var totalCharsToRead = Math.Min(count, Length - _position); | ||
var totalCharsToRead = Math.Min(count, Length - Position); | ||
count = totalCharsToRead; | ||
|
||
var chunkIndex = GetIndexFromPosition(_position); | ||
var chunkStartOffset = GetColumnFromPosition(_position); | ||
var chunkIndex = GetIndexFromPosition(Position); | ||
var chunkStartOffset = GetColumnFromPosition(Position); | ||
|
||
while (true) | ||
{ | ||
|
@@ -374,19 +451,9 @@ public override int Read(char[] buffer, int index, int count) | |
chunkIndex++; | ||
} | ||
|
||
_position += totalCharsToRead; | ||
Position += totalCharsToRead; | ||
Contract.ThrowIfTrue(Position > Length); | ||
return totalCharsToRead; | ||
} | ||
|
||
private int Read(int position) | ||
{ | ||
var chunkIndex = GetIndexFromPosition(position); | ||
var chunkColumn = GetColumnFromPosition(position); | ||
|
||
return _chunks[chunkIndex][chunkColumn]; | ||
} | ||
|
||
private int GetIndexFromPosition(int position) => position / _chunkSize; | ||
private int GetColumnFromPosition(int position) => position % _chunkSize; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,19 +24,25 @@ Namespace Microsoft.CodeAnalysis.VisualBasic | |
|
||
Private _lazyText As SourceText | ||
|
||
Public Sub New(lazyText As SourceText, root As VisualBasicSyntaxNode, options As VisualBasicParseOptions, filePath As String, encoding As Encoding, checksumAlgorithm As SourceHashAlgorithm) | ||
Public Sub New( | ||
lazyText As SourceText, | ||
root As VisualBasicSyntaxNode, | ||
options As VisualBasicParseOptions, | ||
filePath As String, | ||
encoding As Encoding, | ||
checksumAlgorithm As SourceHashAlgorithm) | ||
_lazyText = lazyText | ||
_root = CloneNodeAsRoot(root) | ||
_checksumAlgorithm = checksumAlgorithm | ||
|
||
Me.Encoding = encoding | ||
Me.Options = options | ||
Me.FilePath = filePath | ||
End Sub | ||
|
||
Public Overrides Function GetText(Optional cancellationToken As CancellationToken = Nothing) As SourceText | ||
If _lazyText Is Nothing Then | ||
Interlocked.CompareExchange(_lazyText, GetRoot(cancellationToken).GetText(Encoding, _checksumAlgorithm), Nothing) | ||
Dim text = SourceTextExtensions.CreateSourceText(GetRoot(cancellationToken), Encoding, _checksumAlgorithm, cancellationToken) | ||
Interlocked.CompareExchange(_lazyText, text, Nothing) | ||
End If | ||
|
||
Return _lazyText | ||
|
@@ -71,7 +77,13 @@ Namespace Microsoft.CodeAnalysis.VisualBasic | |
Public Overrides Function WithRootAndOptions(root As SyntaxNode, options As ParseOptions) As SyntaxTree | ||
Return If(ReferenceEquals(root, _root) AndAlso options = Me.Options, | ||
Me, | ||
New ParsedSyntaxTree(If(ReferenceEquals(root, _root), _lazyText, Nothing), DirectCast(root, VisualBasicSyntaxNode), DirectCast(options, VisualBasicParseOptions), FilePath, Encoding, _checksumAlgorithm)) | ||
New ParsedSyntaxTree( | ||
If(ReferenceEquals(root, _root), _lazyText, Nothing), | ||
DirectCast(root, VisualBasicSyntaxNode), | ||
DirectCast(options, VisualBasicParseOptions), | ||
FilePath, | ||
Encoding, | ||
_checksumAlgorithm)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just wrapping. no change. |
||
End Function | ||
|
||
Public Overrides Function WithFilePath(path As String) As SyntaxTree | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just wrapping.