Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make array values intended to be immutable IReadOnlyList #2878

Merged
merged 1 commit into from
Mar 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/Microsoft.ML.Core/Data/IProgressChannel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;

namespace Microsoft.ML
{
Expand Down Expand Up @@ -77,13 +78,13 @@ public sealed class ProgressHeader
/// For example, neural network might have {'epoch', 'example'} and FastTree might have {'tree', 'split', 'feature'}.
/// Will never be null, but can be empty.
/// </summary>
public readonly string[] UnitNames;
public readonly IReadOnlyList<string> UnitNames;

/// <summary>
/// These are the names of the reported metrics. For example, this could be the 'loss', 'weight updates/sec' etc.
/// Will never be null, but can be empty.
/// </summary>
public readonly string[] MetricNames;
public readonly IReadOnlyList<string> MetricNames;

/// <summary>
/// Initialize the header. This will take ownership of the arrays.
Expand Down
16 changes: 7 additions & 9 deletions src/Microsoft.ML.Core/Data/ProgressReporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ public sealed class ProgressChannel : IProgressChannel
{
private readonly IExceptionContext _ectx;

private readonly string _name;

/// <summary>
/// The pair of (header, fill action) is updated atomically.
/// </summary>
Expand All @@ -41,7 +39,7 @@ public sealed class ProgressChannel : IProgressChannel
private volatile int _maxSubId;
private bool _isDisposed;

public string Name { get { return _name; } }
public string Name { get; }

/// <summary>
/// Initialize a <see cref="ProgressChannel"/> for the process identified by <paramref name="computationName"/>.
Expand All @@ -56,7 +54,7 @@ public ProgressChannel(IExceptionContext ectx, ProgressTracker tracker, string c
_ectx.CheckValue(tracker, nameof(tracker));
_ectx.CheckNonEmpty(computationName, nameof(computationName));

_name = computationName;
Name = computationName;
_tracker = tracker;
_subChannels = new ConcurrentDictionary<int, SubChannel>();
_maxSubId = 0;
Expand Down Expand Up @@ -132,7 +130,7 @@ public ProgressEntry GetProgress()
var entry = new ProgressEntry(false, cache.Item1);

if (fillAction == null)
Contracts.Assert(entry.Header.MetricNames.Length == 0 && entry.Header.UnitNames.Length == 0);
Contracts.Assert(entry.Header.MetricNames.Count == 0 && entry.Header.UnitNames.Count == 0);
else
fillAction(entry);

Expand Down Expand Up @@ -232,7 +230,7 @@ public ProgressEntry GetProgress()
var entry = new ProgressEntry(false, cache.Item1);

if (fillAction == null)
Contracts.Assert(entry.Header.MetricNames.Length == 0 && entry.Header.UnitNames.Length == 0);
Contracts.Assert(entry.Header.MetricNames.Count == 0 && entry.Header.UnitNames.Count == 0);
else
fillAction(entry);
return entry;
Expand Down Expand Up @@ -558,9 +556,9 @@ public ProgressEntry(bool isCheckpoint, ProgressHeader header)
Contracts.CheckValue(header, nameof(header));
Header = header;
IsCheckpoint = isCheckpoint;
Progress = new Double?[header.UnitNames.Length];
ProgressLim = new Double?[header.UnitNames.Length];
Metrics = new Double?[header.MetricNames.Length];
Progress = new Double?[header.UnitNames.Count];
ProgressLim = new Double?[header.UnitNames.Count];
Metrics = new Double?[header.MetricNames.Count];
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Core/Environment/ConsoleEnvironment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven

// Progress units.
bool first = true;
for (int i = 0; i < ev.ProgressEntry.Header.UnitNames.Length; i++)
for (int i = 0; i < ev.ProgressEntry.Header.UnitNames.Count; i++)
{
if (ev.ProgressEntry.Progress[i] == null)
continue;
Expand All @@ -272,7 +272,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven
}

// Metrics.
for (int i = 0; i < ev.ProgressEntry.Header.MetricNames.Length; i++)
for (int i = 0; i < ev.ProgressEntry.Header.MetricNames.Count; i++)
{
if (ev.ProgressEntry.Metrics[i] == null)
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using System.Collections.Immutable;
using Microsoft.Data.DataView;

namespace Microsoft.ML.Data
Expand Down Expand Up @@ -78,7 +80,7 @@ public sealed class MultiClassClassifierMetrics
/// p[i] is the probability returned by the classifier if the instance belongs to the class,
/// and 1 minus the probability returned by the classifier if the instance does not belong to the class.
/// </remarks>
public double[] PerClassLogLoss { get; }
public IReadOnlyList<double> PerClassLogLoss { get; }

internal MultiClassClassifierMetrics(IExceptionContext ectx, DataViewRow overallResult, int topK)
{
Expand All @@ -92,8 +94,7 @@ internal MultiClassClassifierMetrics(IExceptionContext ectx, DataViewRow overall
TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy);

var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss);
PerClassLogLoss = new double[perClassLogLoss.Length];
perClassLogLoss.CopyTo(PerClassLogLoss);
PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();
}

internal MultiClassClassifierMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,
Expand All @@ -105,8 +106,7 @@ internal MultiClassClassifierMetrics(double accuracyMicro, double accuracyMacro,
LogLossReduction = logLossReduction;
TopK = topK;
TopKAccuracy = topKAccuracy;
PerClassLogLoss = new double[perClassLogLoss.Length];
perClassLogLoss.CopyTo(PerClassLogLoss, 0);
PerClassLogLoss = perClassLogLoss.ToImmutableArray();
}
}
}
16 changes: 8 additions & 8 deletions src/Microsoft.ML.Data/Evaluators/Metrics/RankingMetrics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using System.Collections.Immutable;
using Microsoft.Data.DataView;

namespace Microsoft.ML.Data
Expand All @@ -15,7 +17,7 @@ public sealed class RankingMetrics
/// Array of normalized discounted cumulative gains where i-th element represent NDCG@i.
/// <image src="https://github.com/dotnet/machinelearning/tree/master/docs/images/NDCG.png"></image>
/// </summary>
public double[] NormalizedDiscountedCumulativeGains { get; }
public IReadOnlyList<double> NormalizedDiscountedCumulativeGains { get; }

/// <summary>
/// Array of discounted cumulative gains where i-th element represent DCG@i.
Expand All @@ -25,7 +27,7 @@ public sealed class RankingMetrics
/// <image src="https://github.com/dotnet/machinelearning/tree/master/docs/images/DCG.png"></image>
/// </summary>
/// <remarks><a href="https://en.wikipedia.org/wiki/Discounted_cumulative_gain">Discounted Cumulative gain.</a></remarks>
public double[] DiscountedCumulativeGains { get; }
public IReadOnlyList<double> DiscountedCumulativeGains { get; }

private static T Fetch<T>(IExceptionContext ectx, DataViewRow row, string name)
{
Expand All @@ -40,16 +42,14 @@ internal RankingMetrics(IExceptionContext ectx, DataViewRow overallResult)
{
VBuffer<double> Fetch(string name) => Fetch<VBuffer<double>>(ectx, overallResult, name);

DiscountedCumulativeGains = Fetch(RankingEvaluator.Dcg).GetValues().ToArray();
NormalizedDiscountedCumulativeGains = Fetch(RankingEvaluator.Ndcg).GetValues().ToArray();
DiscountedCumulativeGains = Fetch(RankingEvaluator.Dcg).DenseValues().ToImmutableArray();
NormalizedDiscountedCumulativeGains = Fetch(RankingEvaluator.Ndcg).DenseValues().ToImmutableArray();
}

internal RankingMetrics(double[] dcg, double[] ndcg)
{
DiscountedCumulativeGains = new double[dcg.Length];
dcg.CopyTo(DiscountedCumulativeGains, 0);
NormalizedDiscountedCumulativeGains = new double[ndcg.Length];
ndcg.CopyTo(NormalizedDiscountedCumulativeGains, 0);
DiscountedCumulativeGains = dcg.ToImmutableArray();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to do a copy here? Would it be terrible if we just set the field to an array?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm honestly not certain, but as I indicated in the description I think it's better to be more concerned at this moment with getting the right public surface. If it turns out that this is an area where we can do an optimization to avoid a copy later, I'm all for it, but I doubt given the nature of this code that a copy will really wind up making a big difference here (as opposed to, say, something in the actual data pipeline).

NormalizedDiscountedCumulativeGains = ndcg.ToImmutableArray();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using System.Linq;
using Microsoft.Data.DataView;
using Microsoft.ML.Data;
Expand Down Expand Up @@ -38,7 +39,8 @@ public abstract class ColumnOptionsBase
public readonly string InputColumnName;
public readonly SortOrder Sort;
public readonly int MaxNumKeys;
public readonly string[] Term;
public IReadOnlyList<string> Term => TermArray;
internal readonly string[] TermArray;
public readonly bool TextKeyValues;

[BestFriend]
Expand All @@ -53,7 +55,7 @@ private protected ColumnOptionsBase(string outputColumnName, string inputColumnN
InputColumnName = inputColumnName ?? outputColumnName;
Sort = sort;
MaxNumKeys = maxNumKeys;
Term = term;
TermArray = term;
TextKeyValues = textKeyValues;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info
{
// First check whether we have a terms argument, and handle it appropriately.
var terms = columns[iinfo].Terms.AsMemory();
var termsArray = columns[iinfo].Term;
var termsArray = columns[iinfo].TermArray;

terms = ReadOnlyMemoryUtils.TrimSpaces(terms);
if (!terms.IsEmpty || (termsArray != null && termsArray.Length > 0))
Expand Down
25 changes: 13 additions & 12 deletions src/Microsoft.ML.Transforms/MetricStatistics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using Microsoft.ML.Internal.Utilities;

namespace Microsoft.ML.Data
Expand Down Expand Up @@ -51,11 +52,11 @@ internal void Add(double metric)

internal static class MetricsStatisticsUtils
{
public static void AddArray(double[] src, MetricStatistics[] dest)
public static void AddToEach(IReadOnlyList<double> src, IReadOnlyList<MetricStatistics> dest)
{
Contracts.Assert(src.Length == dest.Length, "Array sizes do not match.");
Contracts.Assert(src.Count == dest.Count);

for (int i = 0; i < dest.Length; i++)
for (int i = 0; i < dest.Count; i++)
dest[i].Add(src[i]);
}

Expand Down Expand Up @@ -242,7 +243,7 @@ public sealed class MultiClassClassifierMetricsStatistics : IMetricsStatistics<M
/// <summary>
/// Summary statistics for <see cref="MultiClassClassifierMetrics.PerClassLogLoss"/>.
/// </summary>
public MetricStatistics[] PerClassLogLoss { get; private set; }
public IReadOnlyList<MetricStatistics> PerClassLogLoss { get; private set; }

internal MultiClassClassifierMetricsStatistics()
{
Expand All @@ -266,8 +267,8 @@ void IMetricsStatistics<MultiClassClassifierMetrics>.Add(MultiClassClassifierMet
TopKAccuracy.Add(metrics.TopKAccuracy);

if (PerClassLogLoss == null)
PerClassLogLoss = MetricsStatisticsUtils.InitializeArray(metrics.PerClassLogLoss.Length);
MetricsStatisticsUtils.AddArray(metrics.PerClassLogLoss, PerClassLogLoss);
PerClassLogLoss = MetricsStatisticsUtils.InitializeArray(metrics.PerClassLogLoss.Count);
MetricsStatisticsUtils.AddToEach(metrics.PerClassLogLoss, PerClassLogLoss);
}
}

Expand All @@ -280,12 +281,12 @@ public sealed class RankingMetricsStatistics : IMetricsStatistics<RankingMetrics
/// <summary>
/// Summary statistics for <see cref="RankingMetrics.DiscountedCumulativeGains"/>.
/// </summary>
public MetricStatistics[] DiscountedCumulativeGains { get; private set; }
public IReadOnlyList<MetricStatistics> DiscountedCumulativeGains { get; private set; }

/// <summary>
/// Summary statistics for <see cref="RankingMetrics.NormalizedDiscountedCumulativeGains"/>.
/// </summary>
public MetricStatistics[] NormalizedDiscountedCumulativeGains { get; private set; }
public IReadOnlyList<MetricStatistics> NormalizedDiscountedCumulativeGains { get; private set; }

internal RankingMetricsStatistics()
{
Expand All @@ -298,13 +299,13 @@ internal RankingMetricsStatistics()
void IMetricsStatistics<RankingMetrics>.Add(RankingMetrics metrics)
{
if (DiscountedCumulativeGains == null)
DiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.DiscountedCumulativeGains.Length);
DiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.DiscountedCumulativeGains.Count);

if (NormalizedDiscountedCumulativeGains == null)
NormalizedDiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.NormalizedDiscountedCumulativeGains.Length);
NormalizedDiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.NormalizedDiscountedCumulativeGains.Count);

MetricsStatisticsUtils.AddArray(metrics.DiscountedCumulativeGains, DiscountedCumulativeGains);
MetricsStatisticsUtils.AddArray(metrics.NormalizedDiscountedCumulativeGains, NormalizedDiscountedCumulativeGains);
MetricsStatisticsUtils.AddToEach(metrics.DiscountedCumulativeGains, DiscountedCumulativeGains);
MetricsStatisticsUtils.AddToEach(metrics.NormalizedDiscountedCumulativeGains, NormalizedDiscountedCumulativeGains);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using System.Collections.Immutable;
using Microsoft.Data.DataView;
using Microsoft.ML.Data;
Expand Down Expand Up @@ -236,7 +237,7 @@ private static MultiClassClassifierMetrics MulticlassClassificationDelta(
if (a.TopK != b.TopK)
Contracts.Assert(a.TopK == b.TopK, "TopK to compare must be the same length.");

var perClassLogLoss = ComputeArrayDeltas(a.PerClassLogLoss, b.PerClassLogLoss);
var perClassLogLoss = ComputeSequenceDeltas(a.PerClassLogLoss, b.PerClassLogLoss);

return new MultiClassClassifierMetrics(
accuracyMicro: a.MicroAccuracy - b.MicroAccuracy,
Expand Down Expand Up @@ -315,8 +316,8 @@ public static ImmutableArray<RankingMetricsStatistics>
private static RankingMetrics RankingDelta(
RankingMetrics a, RankingMetrics b)
{
var dcg = ComputeArrayDeltas(a.DiscountedCumulativeGains, b.DiscountedCumulativeGains);
var ndcg = ComputeArrayDeltas(a.NormalizedDiscountedCumulativeGains, b.NormalizedDiscountedCumulativeGains);
var dcg = ComputeSequenceDeltas(a.DiscountedCumulativeGains, b.DiscountedCumulativeGains);
var ndcg = ComputeSequenceDeltas(a.NormalizedDiscountedCumulativeGains, b.NormalizedDiscountedCumulativeGains);

return new RankingMetrics(dcg: dcg, ndcg: ndcg);
}
Expand All @@ -325,12 +326,12 @@ private static RankingMetrics RankingDelta(

#region Helpers

private static double[] ComputeArrayDeltas(double[] a, double[] b)
private static double[] ComputeSequenceDeltas(IReadOnlyList<double> a, IReadOnlyList<double> b)
{
Contracts.Assert(a.Length == b.Length, "Arrays to compare must be of the same length.");
Contracts.Assert(a.Count == b.Count);

var delta = new double[a.Length];
for (int i = 0; i < a.Length; i++)
var delta = new double[a.Count];
for (int i = 0; i < a.Count; i++)
delta[i] = a[i] - b[i];
return delta;
}
Expand Down
Loading