diff --git a/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo.cs b/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo.cs index 9ff2a6effe35e..2e23cef362c17 100644 --- a/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo.cs +++ b/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo.cs @@ -3,16 +3,15 @@ // See the LICENSE file in the project root for more information. using System; -using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics; using System.Linq; +using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Collections; using Microsoft.CodeAnalysis.PooledObjects; using Microsoft.CodeAnalysis.Shared.Collections; -using Microsoft.CodeAnalysis.Utilities; using Roslyn.Utilities; namespace Microsoft.CodeAnalysis.FindSymbols @@ -71,15 +70,6 @@ public MultiDictionary.ValueSet GetExtensionMethodI public bool ContainsExtensionMethod => _receiverTypeNameToExtensionMethodMap?.Count > 0; private SpellChecker? _spellChecker; - private SpellChecker SpellChecker - { - get - { - _spellChecker ??= CreateSpellChecker(Checksum, _nodes); - - return _spellChecker.Value; - } - } private SymbolTreeInfo( Checksum checksum, @@ -181,11 +171,16 @@ private async Task> FuzzyFindAsync( using var similarNames = TemporaryArray.Empty; using var result = TemporaryArray.Empty; - SpellChecker.FindSimilarWords(ref similarNames.AsRef(), name, substringsAreSimilar: false); + // Ensure the spell checker is initialized. This is concurrency safe. Technically multiple threads may end + // up overwriting the field, but even if that happens, we are sure to see a fully written spell checker as + // the runtime guarantees that the initialize of the SpellChecker instnace completely written when we read + // our field. + _spellChecker ??= CreateSpellChecker(Checksum, _nodes); + _spellChecker.FindSimilarWords(ref similarNames.AsRef(), name, substringsAreSimilar: false); foreach (var similarName in similarNames) { - var symbols = await FindAsync(lazyAssembly, similarName, ignoreCase: true, cancellationToken: cancellationToken).ConfigureAwait(false); + var symbols = await FindAsync(lazyAssembly, similarName, ignoreCase: true, cancellationToken).ConfigureAwait(false); result.AddRange(symbols); } diff --git a/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs b/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs index b61b6a8bea4bd..d47727930d7cc 100644 --- a/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs +++ b/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Collections; @@ -14,271 +15,268 @@ using Microsoft.CodeAnalysis.Storage; using Roslyn.Utilities; -namespace Microsoft.CodeAnalysis.FindSymbols +namespace Microsoft.CodeAnalysis.FindSymbols; + +internal partial class SymbolTreeInfo : IObjectWritable { - internal partial class SymbolTreeInfo : IObjectWritable + private const string PrefixSymbolTreeInfo = ""; + private static readonly Checksum SerializationFormatChecksum = Checksum.Create("25"); + + /// + /// Generalized function for loading/creating/persisting data. Used as the common core code for serialization + /// of source and metadata SymbolTreeInfos. + /// + private static async Task LoadOrCreateAsync( + SolutionServices services, + SolutionKey solutionKey, + Checksum checksum, + Func> createAsync, + string keySuffix, + CancellationToken cancellationToken) { - private const string PrefixSymbolTreeInfo = ""; - private static readonly Checksum SerializationFormatChecksum = Checksum.Create("25"); - - /// - /// Generalized function for loading/creating/persisting data. Used as the common core code for serialization - /// of source and metadata SymbolTreeInfos. - /// - private static async Task LoadOrCreateAsync( - SolutionServices services, - SolutionKey solutionKey, - Checksum checksum, - Func> createAsync, - string keySuffix, - CancellationToken cancellationToken) + using (Logger.LogBlock(FunctionId.SymbolTreeInfo_TryLoadOrCreate, cancellationToken)) { - using (Logger.LogBlock(FunctionId.SymbolTreeInfo_TryLoadOrCreate, cancellationToken)) - { - // Ok, we can use persistence. First try to load from the persistence service. The data in the - // persistence store must match the checksum passed in. + // Ok, we can use persistence. First try to load from the persistence service. The data in the + // persistence store must match the checksum passed in. - var read = await LoadAsync(services, solutionKey, checksum, checksumMustMatch: true, keySuffix, cancellationToken).ConfigureAwait(false); - if (read != null) - { - // If we were able to read something in, it's checksum better - // have matched the checksum we expected. - Debug.Assert(read.Checksum == checksum); - return read; - } + var read = await LoadAsync(services, solutionKey, checksum, checksumMustMatch: true, keySuffix, cancellationToken).ConfigureAwait(false); + if (read != null) + { + // If we were able to read something in, it's checksum better + // have matched the checksum we expected. + Debug.Assert(read.Checksum == checksum); + return read; + } - cancellationToken.ThrowIfCancellationRequested(); + cancellationToken.ThrowIfCancellationRequested(); - // Now, try to create a new instance and write it to the persistence service. - SymbolTreeInfo result; - using (Logger.LogBlock(FunctionId.SymbolTreeInfo_Create, cancellationToken)) - { - result = await createAsync(checksum).ConfigureAwait(false); - Contract.ThrowIfNull(result); - } + // Now, try to create a new instance and write it to the persistence service. + SymbolTreeInfo result; + using (Logger.LogBlock(FunctionId.SymbolTreeInfo_Create, cancellationToken)) + { + result = await createAsync(checksum).ConfigureAwait(false); + Contract.ThrowIfNull(result); + } - var persistentStorageService = services.GetPersistentStorageService(); + var persistentStorageService = services.GetPersistentStorageService(); - var storage = await persistentStorageService.GetStorageAsync(solutionKey, cancellationToken).ConfigureAwait(false); - await using var _ = storage.ConfigureAwait(false); + var storage = await persistentStorageService.GetStorageAsync(solutionKey, cancellationToken).ConfigureAwait(false); + await using var _ = storage.ConfigureAwait(false); - using (var stream = SerializableBytes.CreateWritableStream()) + using (var stream = SerializableBytes.CreateWritableStream()) + { + using (var writer = new ObjectWriter(stream, leaveOpen: true, cancellationToken)) { - using (var writer = new ObjectWriter(stream, leaveOpen: true, cancellationToken)) - { - result.WriteTo(writer); - } - - stream.Position = 0; - - var key = PrefixSymbolTreeInfo + keySuffix; - await storage.WriteStreamAsync(key, stream, checksum, cancellationToken).ConfigureAwait(false); + result.WriteTo(writer); } - return result; + stream.Position = 0; + + var key = PrefixSymbolTreeInfo + keySuffix; + await storage.WriteStreamAsync(key, stream, checksum, cancellationToken).ConfigureAwait(false); } + + return result; } + } - private static async Task LoadAsync( - SolutionServices services, - SolutionKey solutionKey, - Checksum checksum, - bool checksumMustMatch, - string keySuffix, - CancellationToken cancellationToken) - { - var persistentStorageService = services.GetPersistentStorageService(); + private static async Task LoadAsync( + SolutionServices services, + SolutionKey solutionKey, + Checksum checksum, + bool checksumMustMatch, + string keySuffix, + CancellationToken cancellationToken) + { + var persistentStorageService = services.GetPersistentStorageService(); - var storage = await persistentStorageService.GetStorageAsync(solutionKey, cancellationToken).ConfigureAwait(false); - await using var _ = storage.ConfigureAwait(false); + var storage = await persistentStorageService.GetStorageAsync(solutionKey, cancellationToken).ConfigureAwait(false); + await using var _ = storage.ConfigureAwait(false); - // Get the unique key to identify our data. - var key = PrefixSymbolTreeInfo + keySuffix; + // Get the unique key to identify our data. + var key = PrefixSymbolTreeInfo + keySuffix; - // If the checksum doesn't need to match, then we can pass in 'null' here allowing any result to be found. - using var stream = await storage.ReadStreamAsync(key, checksumMustMatch ? checksum : null, cancellationToken).ConfigureAwait(false); - using var reader = ObjectReader.TryGetReader(stream, cancellationToken: cancellationToken); + // If the checksum doesn't need to match, then we can pass in 'null' here allowing any result to be found. + using var stream = await storage.ReadStreamAsync(key, checksumMustMatch ? checksum : null, cancellationToken).ConfigureAwait(false); + using var reader = ObjectReader.TryGetReader(stream, cancellationToken: cancellationToken); - // We have some previously persisted data. Attempt to read it back. - // If we're able to, and the version of the persisted data matches - // our version, then we can reuse this instance. - return TryReadSymbolTreeInfo(reader, checksum); - } + // We have some previously persisted data. Attempt to read it back. + // If we're able to, and the version of the persisted data matches + // our version, then we can reuse this instance. + return TryReadSymbolTreeInfo(reader, checksum); + } - bool IObjectWritable.ShouldReuseInSerialization => true; + bool IObjectWritable.ShouldReuseInSerialization => true; - public void WriteTo(ObjectWriter writer) + public void WriteTo(ObjectWriter writer) + { + writer.WriteInt32(_nodes.Length); + foreach (var group in GroupByName(_nodes.AsMemory())) { - writer.WriteInt32(_nodes.Length); - foreach (var group in GroupByName(_nodes.AsMemory())) + writer.WriteString(group.Span[0].Name); + writer.WriteInt32(group.Length); + foreach (var item in group.Span) { - writer.WriteString(group.Span[0].Name); - writer.WriteInt32(group.Length); - foreach (var item in group.Span) - { - writer.WriteInt32(item.ParentIndex); - } + writer.WriteInt32(item.ParentIndex); } + } - writer.WriteInt32(_inheritanceMap.Keys.Count); - foreach (var kvp in _inheritanceMap) - { - writer.WriteInt32(kvp.Key); - writer.WriteInt32(kvp.Value.Count); - - foreach (var v in kvp.Value) - { - writer.WriteInt32(v); - } - } + writer.WriteInt32(_inheritanceMap.Keys.Count); + foreach (var kvp in _inheritanceMap) + { + writer.WriteInt32(kvp.Key); + writer.WriteInt32(kvp.Value.Count); - if (_receiverTypeNameToExtensionMethodMap == null) + foreach (var v in kvp.Value) { - writer.WriteInt32(0); + writer.WriteInt32(v); } - else + } + + if (_receiverTypeNameToExtensionMethodMap == null) + { + writer.WriteInt32(0); + } + else + { + writer.WriteInt32(_receiverTypeNameToExtensionMethodMap.Count); + foreach (var key in _receiverTypeNameToExtensionMethodMap.Keys) { - writer.WriteInt32(_receiverTypeNameToExtensionMethodMap.Count); - foreach (var key in _receiverTypeNameToExtensionMethodMap.Keys) - { - writer.WriteString(key); + writer.WriteString(key); - var values = _receiverTypeNameToExtensionMethodMap[key]; - writer.WriteInt32(values.Count); + var values = _receiverTypeNameToExtensionMethodMap[key]; + writer.WriteInt32(values.Count); - foreach (var value in values) - { - writer.WriteString(value.FullyQualifiedContainerName); - writer.WriteString(value.Name); - } + foreach (var value in values) + { + writer.WriteString(value.FullyQualifiedContainerName); + writer.WriteString(value.Name); } } + } - if (!_spellChecker.HasValue) - { - writer.WriteBoolean(false); - } - else - { - writer.WriteBoolean(true); - SpellChecker.WriteTo(writer); - } + var spellChecker = _spellChecker; - return; + if (spellChecker is null) + { + writer.WriteBoolean(false); + } + else + { + writer.WriteBoolean(true); + spellChecker.WriteTo(writer); + } - // sortedNodes is an array of Node instances which is often sorted by Node.Name by the caller. This method - // produces a sequence of spans within sortedNodes for Node instances that all have the same Name, allowing - // serialization to record the string once followed by the remaining properties for the nodes in the group. - static IEnumerable> GroupByName(ReadOnlyMemory sortedNodes) - { - if (sortedNodes.IsEmpty) - yield break; + return; + + // sortedNodes is an array of Node instances which is often sorted by Node.Name by the caller. This method + // produces a sequence of spans within sortedNodes for Node instances that all have the same Name, allowing + // serialization to record the string once followed by the remaining properties for the nodes in the group. + static IEnumerable> GroupByName(ReadOnlyMemory sortedNodes) + { + if (sortedNodes.IsEmpty) + yield break; - var startIndex = 0; - var currentName = sortedNodes.Span[0].Name; - for (var i = 1; i < sortedNodes.Length; i++) + var startIndex = 0; + var currentName = sortedNodes.Span[0].Name; + for (var i = 1; i < sortedNodes.Length; i++) + { + var node = sortedNodes.Span[i]; + if (node.Name != currentName) { - var node = sortedNodes.Span[i]; - if (node.Name != currentName) - { - yield return sortedNodes[startIndex..i]; - startIndex = i; - } + yield return sortedNodes[startIndex..i]; + startIndex = i; } - - yield return sortedNodes[startIndex..sortedNodes.Length]; } + + yield return sortedNodes[startIndex..sortedNodes.Length]; } + } - private static SymbolTreeInfo? TryReadSymbolTreeInfo( - ObjectReader reader, Checksum checksum) + private static SymbolTreeInfo? TryReadSymbolTreeInfo( + ObjectReader reader, Checksum checksum) + { + if (reader == null) + return null; + + try { - if (reader == null) - return null; + var nodeCount = reader.ReadInt32(); + using var _ = ArrayBuilder.GetInstance(nodeCount, out var nodes); - try + for (var i = 0; i < nodeCount; i++) { - var nodeCount = reader.ReadInt32(); - using var _ = ArrayBuilder.GetInstance(nodeCount, out var nodes); - - for (var i = 0; i < nodeCount; i++) + var name = reader.ReadString(); + var groupCount = reader.ReadInt32(); + for (var j = 0; j < groupCount; j++) { - var name = reader.ReadString(); - var groupCount = reader.ReadInt32(); - for (var j = 0; j < groupCount; j++) - { - var parentIndex = reader.ReadInt32(); - nodes.Add(new Node(name, parentIndex)); - } + var parentIndex = reader.ReadInt32(); + nodes.Add(new Node(name, parentIndex)); } + } - var inheritanceMap = new OrderPreservingMultiDictionary(); - var inheritanceMapKeyCount = reader.ReadInt32(); - for (var i = 0; i < inheritanceMapKeyCount; i++) - { - var key = reader.ReadInt32(); - var valueCount = reader.ReadInt32(); + var inheritanceMap = new OrderPreservingMultiDictionary(); + var inheritanceMapKeyCount = reader.ReadInt32(); + for (var i = 0; i < inheritanceMapKeyCount; i++) + { + var key = reader.ReadInt32(); + var valueCount = reader.ReadInt32(); - for (var j = 0; j < valueCount; j++) - { - var value = reader.ReadInt32(); - inheritanceMap.Add(key, value); - } + for (var j = 0; j < valueCount; j++) + { + var value = reader.ReadInt32(); + inheritanceMap.Add(key, value); } + } - MultiDictionary? receiverTypeNameToExtensionMethodMap; + MultiDictionary? receiverTypeNameToExtensionMethodMap; - var keyCount = reader.ReadInt32(); - if (keyCount == 0) - { - receiverTypeNameToExtensionMethodMap = null; - } - else + var keyCount = reader.ReadInt32(); + if (keyCount == 0) + { + receiverTypeNameToExtensionMethodMap = null; + } + else + { + receiverTypeNameToExtensionMethodMap = new(); + + for (var i = 0; i < keyCount; i++) { - receiverTypeNameToExtensionMethodMap = new MultiDictionary(); + var typeName = reader.ReadString(); + var valueCount = reader.ReadInt32(); - for (var i = 0; i < keyCount; i++) + for (var j = 0; j < valueCount; j++) { - var typeName = reader.ReadString(); - var valueCount = reader.ReadInt32(); + var containerName = reader.ReadString(); + var name = reader.ReadString(); - for (var j = 0; j < valueCount; j++) - { - var containerName = reader.ReadString(); - var name = reader.ReadString(); - - receiverTypeNameToExtensionMethodMap.Add(typeName, new ExtensionMethodInfo(containerName, name)); - } + receiverTypeNameToExtensionMethodMap.Add(typeName, new ExtensionMethodInfo(containerName, name)); } } - - SpellChecker? spellChecker = null; - - var spellCheckerPersisted = reader.ReadBoolean(); - if (spellCheckerPersisted) - { - spellChecker = SpellChecker.TryReadFrom(reader); - if (spellChecker is null) - return null; - } - - var nodeArray = nodes.ToImmutableAndClear(); - - return new SymbolTreeInfo( - checksum, nodeArray, spellChecker, inheritanceMap, receiverTypeNameToExtensionMethodMap); - } - catch - { - Logger.Log(FunctionId.SymbolTreeInfo_ExceptionInCacheRead); } - return null; - } + // if we can't read in the spell checker, that's ok. This should never happen in practice (it would + // mean someone tweaked the data in the database), and we can just regenerate it from the information + // stored in 'nodes' anyways. + var spellCheckerPersisted = reader.ReadBoolean(); + var spellChecker = spellCheckerPersisted + ? SpellChecker.TryReadFrom(reader) + : null; - internal readonly partial struct TestAccessor + return new SymbolTreeInfo( + checksum, nodes.ToImmutableAndClear(), spellChecker, inheritanceMap, receiverTypeNameToExtensionMethodMap); + } + catch { - public static SymbolTreeInfo? ReadSymbolTreeInfo(ObjectReader reader, Checksum checksum) - => TryReadSymbolTreeInfo(reader, checksum); + Logger.Log(FunctionId.SymbolTreeInfo_ExceptionInCacheRead); } + + return null; + } + + internal readonly partial struct TestAccessor + { + public static SymbolTreeInfo? ReadSymbolTreeInfo(ObjectReader reader, Checksum checksum) + => TryReadSymbolTreeInfo(reader, checksum); } } diff --git a/src/Workspaces/Core/Portable/Utilities/SpellChecker.cs b/src/Workspaces/Core/Portable/Utilities/SpellChecker.cs index 1790c2d349e7b..14ed68f0d04e9 100644 --- a/src/Workspaces/Core/Portable/Utilities/SpellChecker.cs +++ b/src/Workspaces/Core/Portable/Utilities/SpellChecker.cs @@ -10,7 +10,11 @@ namespace Roslyn.Utilities { - internal readonly struct SpellChecker(Checksum checksum, BKTree bKTree) : IObjectWritable, IChecksummedObject + /// + /// Explicitly a reference type so that the consumer of this in can safely operate on an + /// instance without having to lock to ensure it sees the entirety of the value written out. + /// > + internal sealed class SpellChecker(Checksum checksum, BKTree bKTree) : IObjectWritable, IChecksummedObject { private const string SerializationFormat = "3"; diff --git a/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.Serialization.cs b/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.Serialization.cs index e5e0ecc5734e0..818daea5d941b 100644 --- a/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.Serialization.cs +++ b/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.Serialization.cs @@ -2,65 +2,52 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System.Collections.Immutable; using Microsoft.CodeAnalysis.Internal.Log; +using Microsoft.CodeAnalysis.PooledObjects; -namespace Roslyn.Utilities +namespace Roslyn.Utilities; + +internal readonly partial struct BKTree { - internal readonly partial struct BKTree + internal void WriteTo(ObjectWriter writer) { - internal void WriteTo(ObjectWriter writer) - { - writer.WriteInt32(_concatenatedLowerCaseWords.Length); - foreach (var c in _concatenatedLowerCaseWords) - { - writer.WriteChar(c); - } + writer.WriteInt32(_concatenatedLowerCaseWords.Length); + foreach (var c in _concatenatedLowerCaseWords) + writer.WriteChar(c); - writer.WriteInt32(_nodes.Length); - foreach (var node in _nodes) - { - node.WriteTo(writer); - } + writer.WriteInt32(_nodes.Length); + foreach (var node in _nodes) + node.WriteTo(writer); - writer.WriteInt32(_edges.Length); - foreach (var edge in _edges) - { - edge.WriteTo(writer); - } - } + writer.WriteInt32(_edges.Length); + foreach (var edge in _edges) + edge.WriteTo(writer); + } - internal static BKTree? ReadFrom(ObjectReader reader) + internal static BKTree? ReadFrom(ObjectReader reader) + { + try { - try - { - var concatenatedLowerCaseWords = new char[reader.ReadInt32()]; - for (var i = 0; i < concatenatedLowerCaseWords.Length; i++) - { - concatenatedLowerCaseWords[i] = reader.ReadChar(); - } + var concatenatedLowerCaseWords = new char[reader.ReadInt32()]; + for (var i = 0; i < concatenatedLowerCaseWords.Length; i++) + concatenatedLowerCaseWords[i] = reader.ReadChar(); - var nodeCount = reader.ReadInt32(); - var nodes = ImmutableArray.CreateBuilder(nodeCount); - for (var i = 0; i < nodeCount; i++) - { - nodes.Add(Node.ReadFrom(reader)); - } + var nodeCount = reader.ReadInt32(); + using var _1 = ArrayBuilder.GetInstance(nodeCount, out var nodes); + for (var i = 0; i < nodeCount; i++) + nodes.Add(Node.ReadFrom(reader)); - var edgeCount = reader.ReadInt32(); - var edges = ImmutableArray.CreateBuilder(edgeCount); - for (var i = 0; i < edgeCount; i++) - { - edges.Add(Edge.ReadFrom(reader)); - } + var edgeCount = reader.ReadInt32(); + using var _2 = ArrayBuilder.GetInstance(edgeCount, out var edges); + for (var i = 0; i < edgeCount; i++) + edges.Add(Edge.ReadFrom(reader)); - return new BKTree(concatenatedLowerCaseWords, nodes.MoveToImmutable(), edges.MoveToImmutable()); - } - catch - { - Logger.Log(FunctionId.BKTree_ExceptionInCacheRead); - return null; - } + return new BKTree(concatenatedLowerCaseWords, nodes.ToImmutableAndClear(), edges.ToImmutableAndClear()); + } + catch + { + Logger.Log(FunctionId.BKTree_ExceptionInCacheRead); + return null; } } } diff --git a/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.cs b/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.cs index 94b9eff7fb8f6..de49f93971eb8 100644 --- a/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.cs +++ b/src/Workspaces/SharedUtilitiesAndExtensions/Compiler/Core/Utilities/BKTree.cs @@ -2,75 +2,71 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using System.Collections.Immutable; -using System.Linq; using Microsoft.CodeAnalysis; -using Microsoft.CodeAnalysis.Utilities; -using System; using Microsoft.CodeAnalysis.Shared.Collections; namespace Roslyn.Utilities { /// - /// NOTE: Only use if you truly need a BK-tree. If you just want to compare words, use - /// the 'SpellChecker' type instead. - /// + /// NOTE: Only use if you truly need a BK-tree. If you just want to compare words, use the 'SpellChecker' type + /// instead. + /// /// An implementation of a Burkhard-Keller tree. Introduced in: - /// - /// 'Some approaches to best-match file searching.' - /// Communications of the ACM CACM - /// Volume 16 Issue 4, April 1973 - /// Pages 230-236 - /// http://dl.acm.org/citation.cfm?doid=362003.362025 + /// + /// 'Some approaches to best-match file searching.' Communications of the ACM CACM Volume 16 Issue 4, April 1973 + /// Pages 230-236 http://dl.acm.org/citation.cfm?doid=362003.362025. /// internal readonly partial struct BKTree { - public static readonly BKTree Empty = new( - Array.Empty(), - ImmutableArray.Empty, - ImmutableArray.Empty); - - // We have three completely flat arrays of structs. These arrays fully represent the - // BK tree. The structure is as follows: - // - // The root node is in _nodes[0]. - // - // It lists the count of edges it has. These edges are in _edges in the range - // [0*, childCount). Each edge has the index of the child node it points to, and the - // edit distance between the parent and the child. - // - // * of course '0' is only for the root case. - // - // All nodes state where in _edges their child edges range starts, so the children - // for any node are in the range[node.FirstEdgeIndex, node.FirstEdgeIndex + node.EdgeCount). - // - // Each node also has an associated string. These strings are concatenated and stored - // in _concatenatedLowerCaseWords. Each node has a TextSpan that indicates which portion - // of the character array is their string. Note: i'd like to use an immutable array - // for the characters as well. However, we need to create slices, and they need to - // work on top of an ArraySlice (which needs a char[]). The edit distance code also - // wants to work on top of raw char[]s (both for speed, and so it can pool arrays - // to prevent lots of garbage). Because of that we just keep this as a char[]. + public static readonly BKTree Empty = new([], ImmutableArray.Empty, ImmutableArray.Empty); + + /// + /// We have three completely flat arrays of structs. These arrays fully represent the BK tree. The structure + /// is as follows: + /// + /// The root node is in _nodes[0]. + /// + /// It lists the count of edges it has. These edges are in _edges in the range [0*, childCount). Each edge has + /// the index of the child node it points to, and the edit distance between the parent and the child. + /// + /// * of course '0' is only for the root case. + /// + /// All nodes state where in _edges their child edges range starts, so the children for any node are in the + /// range[node.FirstEdgeIndex, node.FirstEdgeIndex + node.EdgeCount). + /// + /// Each node also has an associated string. These strings are concatenated and stored in + /// _concatenatedLowerCaseWords. Each node has a TextSpan that indicates which portion of the character array + /// is their string. Note: i'd like to use an immutable array for the characters as well. However, we need to + /// create slices, and they need to work on top of an ArraySlice (which needs a char[]). The edit distance code + /// also wants to work on top of raw char[]s (both for speed, and so it can pool arrays to prevent lots of + /// garbage). Because of that we just keep this as a char[]. + /// private readonly char[] _concatenatedLowerCaseWords; private readonly ImmutableArray _nodes; private readonly ImmutableArray _edges; private BKTree(char[] concatenatedLowerCaseWords, ImmutableArray nodes, ImmutableArray edges) { + Contract.ThrowIfNull(concatenatedLowerCaseWords, nameof(_concatenatedLowerCaseWords)); + Contract.ThrowIfTrue(nodes.IsDefault, $"{nameof(nodes)}.{nameof(nodes.IsDefault)}"); + Contract.ThrowIfTrue(edges.IsDefault, $"{nameof(edges)}.{nameof(edges.IsDefault)}"); _concatenatedLowerCaseWords = concatenatedLowerCaseWords; _nodes = nodes; _edges = edges; } - public static BKTree Create(params string[] values) - => Create((IEnumerable)values); - public static BKTree Create(IEnumerable values) => new Builder(values).Create(); public void Find(ref TemporaryArray result, string value, int? threshold = null) { + Contract.ThrowIfNull(value, nameof(value)); + Contract.ThrowIfNull(_concatenatedLowerCaseWords, nameof(_concatenatedLowerCaseWords)); + Contract.ThrowIfTrue(_nodes.IsDefault, $"{nameof(_nodes)}.{nameof(_nodes.IsDefault)}"); + Contract.ThrowIfTrue(_edges.IsDefault, $"{nameof(_edges)}.{nameof(_edges.IsDefault)}"); if (_nodes.Length == 0) return;