From a5875a43fad11ebb0ffaafef97ae9a372e995845 Mon Sep 17 00:00:00 2001 From: "Harish S. Kulkarni" Date: Tue, 1 Dec 2020 10:40:46 -0800 Subject: [PATCH 1/6] Fixed memory leak from OnnxTransformer and related x86 build fixes --- Directory.Build.targets | 4 +- .../OnnxTransform.cs | 120 +++++++++++++----- src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs | 70 +++++----- src/Native/build.sh | 3 + test/Microsoft.ML.Tests/OnnxConversionTest.cs | 4 +- 5 files changed, 133 insertions(+), 68 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index e11804c23b..09b9747d35 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -12,7 +12,9 @@ x64 $(TargetArchitecture) $([MSBuild]::NormalizeDirectory('$(RepoRoot)', 'artifacts', 'bin')) - $(BinDir)Native\$(NativeTargetArchitecture).$(Configuration)\ + Debug + Release + $(BinDir)Native\$(NativeTargetArchitecture).$(NativeOutputConfig)\ AnyCPU $(Platform).$(Configuration) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index 878fabf6c5..38d8beb800 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -362,8 +362,12 @@ public void Dispose() _isDisposed = true; } - private sealed class Mapper : MapperBase + private sealed class Mapper : IRowMapper { + private readonly IHost _host; + private readonly DataViewSchema _inputSchema; + private readonly Lazy _outputColumns; + private readonly OnnxTransformer _parent; /// /// 's i-th element value tells the column index to @@ -379,9 +383,11 @@ private sealed class Mapper : MapperBase /// private readonly Type[] _inputOnnxTypes; - public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) : - base(Contracts.CheckRef(parent, nameof(parent)).Host.Register(nameof(Mapper)), inputSchema, parent) + public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) { + _host = Contracts.CheckRef(parent, nameof(parent)).Host.Register(nameof(Mapper)); + _inputSchema = inputSchema; + _outputColumns = new Lazy(GetOutputColumnsCore); _parent = parent; _inputColIndices = new int[_parent.Inputs.Length]; @@ -401,7 +407,7 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) : var col = inputSchema.GetColumnOrNull(_parent.Inputs[i]); if (!col.HasValue) - throw Host.ExceptSchemaMismatch(nameof(inputSchema),"input", _parent.Inputs[i]); + throw _host.ExceptSchemaMismatch(nameof(inputSchema),"input", _parent.Inputs[i]); _inputColIndices[i] = col.Value.Index; @@ -409,7 +415,7 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) : var vectorType = type as VectorDataViewType; if (vectorType != null && vectorType.Size == 0) - throw Host.Except($"Variable length input columns not supported"); + throw _host.Except($"Variable length input columns not supported"); var itemType = type.GetItemType(); var nodeItemType = inputNodeInfo.DataViewType.GetItemType(); @@ -421,7 +427,7 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) : // This is done to support a corner case originated in NimbusML. For more info, see: https://github.com/microsoft/NimbusML/issues/426 var isKeyType = itemType is KeyDataViewType; if (!isKeyType || itemType.RawType != nodeItemType.RawType) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.Inputs[i], inputNodeInfo.DataViewType.GetItemType().ToString(), type.ToString()); + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.Inputs[i], inputNodeInfo.DataViewType.GetItemType().ToString(), type.ToString()); } // If the column is one dimension we make sure that the total size of the Onnx shape matches. @@ -433,8 +439,9 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) : throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {String.Join(",", inputShape)}, but input data is of length {typeValueCount}."); } } + DataViewSchema.DetachedColumn[] IRowMapper.GetOutputColumns() => _outputColumns.Value; - protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() + private DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var stdSuffix = ".output"; var info = new DataViewSchema.DetachedColumn[_parent.Outputs.Length]; @@ -476,17 +483,16 @@ private void AddSlotNames(string columnName, DataViewSchema.Annotations.Builder builder.AddSlotNames(count, getter); } - private protected override Func GetDependenciesCore(Func activeOutput) + private Func GetDependenciesCore(Func activeOutput) { return col => Enumerable.Range(0, _parent.Outputs.Length).Any(i => activeOutput(i)) && _inputColIndices.Any(i => i == col); } - private protected override void SaveModel(ModelSaveContext ctx) => _parent.SaveModel(ctx); + private void SaveModel(ModelSaveContext ctx) => _parent.SaveModel(ctx); - protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) + private Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, OnnxRuntimeOutputCacher outputCacher) { - disposer = null; - Host.AssertValue(input); + _host.AssertValue(input); var activeOutputColNames = _parent.Outputs.Where((x, i) => activeOutput(i)).ToArray(); @@ -495,26 +501,65 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func, elemRawType, input, iinfo, srcNamedValueGetters, activeOutputColNames); + return Utils.MarshalInvoke(MakeTensorGetter, elemRawType, input, iinfo, srcNamedValueGetters, activeOutputColNames, outputCacher); } else { var type = _parent.Model.ModelInfo.OutputsInfo[_parent.MapDataViewColumnToOnnxOutputTensor(iinfo)].DataViewType.RawType; var srcNamedValueGetters = GetNamedOnnxValueGetters(input, _inputColIndices, _inputOnnxTypes, _inputTensorShapes); - return Utils.MarshalInvoke(MakeObjectGetter, type, input, iinfo, srcNamedValueGetters, activeOutputColNames); + return Utils.MarshalInvoke(MakeObjectGetter, type, input, iinfo, srcNamedValueGetters, activeOutputColNames, outputCacher); + } + } + + Delegate[] IRowMapper.CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) + { + Contracts.Assert(input.Schema == _inputSchema); + + OnnxRuntimeOutputCacher outputCacher = new OnnxRuntimeOutputCacher(); + + int n = _outputColumns.Value.Length; + var result = new Delegate[n]; + for (int i = 0; i < n; i++) + { + if (!activeOutput(i)) + continue; + result[i] = MakeGetter(input, i, activeOutput, outputCacher); } + disposer = () => + { + outputCacher.Dispose(); + }; + return result; } - private class OnnxRuntimeOutputCacher + internal class OnnxRuntimeOutputCacher : IDisposable { public long Position; - public Dictionary Outputs; + public Dictionary Outputs; + public IDisposableReadOnlyCollection OutputOnnxValues; + public OnnxRuntimeOutputCacher() { Position = -1; - Outputs = new Dictionary(); + Outputs = new Dictionary(); + } + + private bool _isDisposed; + + protected virtual void Dispose(bool disposing) + { + if (_isDisposed) + return; + OutputOnnxValues?.Dispose(); + _isDisposed = true; + } + + public void Dispose() + { + Dispose(disposing: true); + GC.SuppressFinalize(this); } } @@ -529,10 +574,11 @@ private void UpdateCacheIfNeeded(long position, INamedOnnxValueGetter[] srcNamed inputNameOnnxValues.Add(srcNamedOnnxValueGetters[i].GetNamedOnnxValue()); } - var outputNamedOnnxValues = _parent.Model.Run(inputNameOnnxValues); - Contracts.Assert(outputNamedOnnxValues.Count > 0); + outputCache.OutputOnnxValues?.Dispose(); + outputCache.OutputOnnxValues = _parent.Model.Run(inputNameOnnxValues); + Contracts.Assert(outputCache.OutputOnnxValues.Count > 0); - foreach (var outputNameOnnxValue in outputNamedOnnxValues) + foreach (var outputNameOnnxValue in outputCache.OutputOnnxValues) { outputCache.Outputs[outputNameOnnxValue.Name] = outputNameOnnxValue; } @@ -540,17 +586,17 @@ private void UpdateCacheIfNeeded(long position, INamedOnnxValueGetter[] srcNamed } } - private Delegate MakeTensorGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames) + private Delegate MakeTensorGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, + string[] activeOutputColNames, OnnxRuntimeOutputCacher outputCacher) { - Host.AssertValue(input); - var outputCacher = new OnnxRuntimeOutputCacher(); + _host.AssertValue(input); ValueGetter> valueGetter = (ref VBuffer dst) => { UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCacher); var namedOnnxValue = outputCacher.Outputs[_parent.Outputs[iinfo]]; var tensor = namedOnnxValue.AsTensor() as Microsoft.ML.OnnxRuntime.Tensors.DenseTensor; if (tensor == null) - throw Host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(T)}"); + throw _host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(T)}"); var editor = VBufferEditor.Create(ref dst, (int)tensor.Length); tensor.Buffer.Span.CopyTo(editor.Values); dst = editor.Commit(); @@ -558,17 +604,17 @@ private Delegate MakeTensorGetter(DataViewRow input, int iinfo, INamedOnnxVal return valueGetter; } - private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames) + private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, + string[] activeOutputColNames, OnnxRuntimeOutputCacher outputCacher) { - Host.AssertValue(input); - var outputCacher = new OnnxRuntimeOutputCacher(); + _host.AssertValue(input); ValueGetter>> valueGetter = (ref VBuffer> dst) => { UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCacher); var namedOnnxValue = outputCacher.Outputs[_parent.Outputs[iinfo]]; var tensor = namedOnnxValue.AsTensor() as Microsoft.ML.OnnxRuntime.Tensors.DenseTensor; if (tensor == null) - throw Host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(string)}"); + throw _host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(string)}"); // Create VBufferEditor to fill "dst" with the values in "denseTensor". var editor = VBufferEditor.Create(ref dst, (int)tensor.Length); @@ -580,14 +626,14 @@ private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnx return valueGetter; } - private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames) + private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, + string[] activeOutputColNames, OnnxRuntimeOutputCacher outputCacher) { - Host.AssertValue(input); - var outputCache = new OnnxRuntimeOutputCacher(); + _host.AssertValue(input); ValueGetter valueGetter = (ref T dst) => { - UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCache); - var namedOnnxValue = outputCache.Outputs[_parent.Outputs[iinfo]]; + UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCacher); + var namedOnnxValue = outputCacher.Outputs[_parent.Outputs[iinfo]]; var trueValue = namedOnnxValue.AsEnumerable().Select(value => value.AsDictionary()); var caster = _parent.Model.ModelInfo.OutputsInfo[_parent.MapDataViewColumnToOnnxOutputTensor(iinfo)].Caster; dst = (T)caster(namedOnnxValue); @@ -664,6 +710,12 @@ private static INamedOnnxValueGetter CreateNamedOnnxValueGetterVecCore(DataVi return new NamedOnnxValueGetterVec(input, colIndex, onnxShape); } + void ICanSaveModel.Save(ModelSaveContext ctx) => SaveModel(ctx); + + Func IRowMapper.GetDependencies(Func activeOutput) => GetDependenciesCore(activeOutput); + + public ITransformer GetTransformer() => _parent; + /// /// Common function for wrapping ML.NET getter as a NamedOnnxValue getter. /// diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs index 02c24b1ad9..29c94087fc 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs @@ -198,40 +198,48 @@ public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = _session = new InferenceSession(modelFile); } - // Load ONNX model file and parse its input and output schema. The reason of doing so is that ONNXRuntime - // doesn't expose full type information via its C# APIs. - ModelFile = modelFile; - var model = new OnnxCSharpToProtoWrapper.ModelProto(); - using (var modelStream = File.OpenRead(modelFile)) - using (var codedStream = Google.Protobuf.CodedInputStream.CreateWithLimits(modelStream, Int32.MaxValue, 10)) - model = OnnxCSharpToProtoWrapper.ModelProto.Parser.ParseFrom(codedStream); - - // Parse actual input and output types stored in the loaded ONNX model to get their DataViewType's. - var inputTypePool = new Dictionary(); - foreach (var valueInfo in model.Graph.Input) - inputTypePool[valueInfo.Name] = OnnxTypeParser.GetDataViewType(valueInfo.Type); - - var initializerTypePool = new Dictionary(); - foreach (var valueInfo in model.Graph.Initializer) - initializerTypePool[valueInfo.Name] = OnnxTypeParser.GetScalarDataViewType(valueInfo.DataType); - - var outputTypePool = new Dictionary(); - // Build casters which maps NamedOnnxValue to .NET objects. - var casterPool = new Dictionary>(); - foreach (var valueInfo in model.Graph.Output) + try { - outputTypePool[valueInfo.Name] = OnnxTypeParser.GetDataViewType(valueInfo.Type); - casterPool[valueInfo.Name] = OnnxTypeParser.GetDataViewValueCasterAndResultedType(valueInfo.Type, out Type actualType); - } + // Load ONNX model file and parse its input and output schema. The reason of doing so is that ONNXRuntime + // doesn't expose full type information via its C# APIs. + ModelFile = modelFile; + var model = new OnnxCSharpToProtoWrapper.ModelProto(); + using (var modelStream = File.OpenRead(modelFile)) + using (var codedStream = Google.Protobuf.CodedInputStream.CreateWithLimits(modelStream, Int32.MaxValue, 10)) + model = OnnxCSharpToProtoWrapper.ModelProto.Parser.ParseFrom(codedStream); + + // Parse actual input and output types stored in the loaded ONNX model to get their DataViewType's. + var inputTypePool = new Dictionary(); + foreach (var valueInfo in model.Graph.Input) + inputTypePool[valueInfo.Name] = OnnxTypeParser.GetDataViewType(valueInfo.Type); + + var initializerTypePool = new Dictionary(); + foreach (var valueInfo in model.Graph.Initializer) + initializerTypePool[valueInfo.Name] = OnnxTypeParser.GetScalarDataViewType(valueInfo.DataType); + + var outputTypePool = new Dictionary(); + // Build casters which maps NamedOnnxValue to .NET objects. + var casterPool = new Dictionary>(); + foreach (var valueInfo in model.Graph.Output) + { + outputTypePool[valueInfo.Name] = OnnxTypeParser.GetDataViewType(valueInfo.Type); + casterPool[valueInfo.Name] = OnnxTypeParser.GetDataViewValueCasterAndResultedType(valueInfo.Type, out Type actualType); + } - var inputInfos = GetOnnxVariablesFromMetadata(_session.InputMetadata, shapeDictionary, inputTypePool, null); - var outputInfos = GetOnnxVariablesFromMetadata(_session.OutputMetadata, shapeDictionary, outputTypePool, casterPool); - var overrideableInitializers = GetOnnxVariablesFromMetadata(_session.OverridableInitializerMetadata, shapeDictionary, inputTypePool, null); + var inputInfos = GetOnnxVariablesFromMetadata(_session.InputMetadata, shapeDictionary, inputTypePool, null); + var outputInfos = GetOnnxVariablesFromMetadata(_session.OutputMetadata, shapeDictionary, outputTypePool, casterPool); + var overrideableInitializers = GetOnnxVariablesFromMetadata(_session.OverridableInitializerMetadata, shapeDictionary, inputTypePool, null); - // Create a view to the used ONNX model from ONNXRuntime's perspective. - ModelInfo = new OnnxModelInfo(inputInfos, outputInfos, overrideableInitializers); + // Create a view to the used ONNX model from ONNXRuntime's perspective. + ModelInfo = new OnnxModelInfo(inputInfos, outputInfos, overrideableInitializers); - Graph = model.Graph; + Graph = model.Graph; + } + catch (Exception) + { + _session.Dispose(); + throw; + } } private List GetOnnxVariablesFromMetadata(IReadOnlyDictionary nodeMetadata, @@ -350,7 +358,7 @@ public static OnnxModel CreateFromBytes(byte[] modelBytes, int? gpuDeviceId = nu /// /// The NamedOnnxValues to score. /// Resulting output NamedOnnxValues list. - public IReadOnlyCollection Run(List inputNamedOnnxValues) + public IDisposableReadOnlyCollection Run(List inputNamedOnnxValues) { return _session.Run(inputNamedOnnxValues); } diff --git a/src/Native/build.sh b/src/Native/build.sh index cbc7582ce1..394cc74e1a 100755 --- a/src/Native/build.sh +++ b/src/Native/build.sh @@ -63,6 +63,9 @@ while [ "$1" != "" ]; do shift done +# strip the -netcoreapp3_1 and -netfx suffixes from the configuration +__configuration=${__configuration/-netcoreapp3_1/} +__configuration=${__configuration/-netfx/} __cmake_defines="-DCMAKE_BUILD_TYPE=${__configuration} ${__strip_argument} -DMKL_LIB_PATH=${__mkllibpath} -DMKL_LIB_RPATH=${__mkllibrpath}" __IntermediatesDir="$__baseIntermediateOutputPath/Native/$__build_arch.$__configuration" diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 9f97f21ad4..89b838c82e 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -788,7 +788,7 @@ public void RemoveVariablesInPipelineTest() .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfLeaves: 2, numberOfTrees: 1, minimumExampleCountPerLeaf: 2)); - var model = pipeline.Fit(data); + using var model = pipeline.Fit(data); var transformedData = model.Transform(data); var onnxConversionContext = new OnnxContextImpl(mlContext, "A Simple Pipeline", "ML.NET", "0", 0, "machinelearning.dotnet", OnnxVersion.Stable); @@ -2029,7 +2029,7 @@ private void TestPipeline(EstimatorChain(EstimatorChain pipeline, IDataView dataView, string onnxFileName, ColumnComparison[] columnsToCompare, string onnxTxtName = null, string onnxTxtSubDir = null) where TLastTransformer : class, ITransformer { - var model = pipeline.Fit(dataView); + using var model = pipeline.Fit(dataView); var transformedData = model.Transform(dataView); var onnxModel = ML.Model.ConvertToOnnxProtobuf(model, dataView); From 07bc97b50c82b5afbcc6bbeb52ddbddb00906e71 Mon Sep 17 00:00:00 2001 From: "Harish S. Kulkarni" Date: Tue, 1 Dec 2020 11:08:53 -0800 Subject: [PATCH 2/6] Reverting x86 build related fixes to focus only on the memory leaks --- Directory.Build.targets | 4 +--- src/Native/build.sh | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 09b9747d35..e11804c23b 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -12,9 +12,7 @@ x64 $(TargetArchitecture) $([MSBuild]::NormalizeDirectory('$(RepoRoot)', 'artifacts', 'bin')) - Debug - Release - $(BinDir)Native\$(NativeTargetArchitecture).$(NativeOutputConfig)\ + $(BinDir)Native\$(NativeTargetArchitecture).$(Configuration)\ AnyCPU $(Platform).$(Configuration) diff --git a/src/Native/build.sh b/src/Native/build.sh index 394cc74e1a..cbc7582ce1 100755 --- a/src/Native/build.sh +++ b/src/Native/build.sh @@ -63,9 +63,6 @@ while [ "$1" != "" ]; do shift done -# strip the -netcoreapp3_1 and -netfx suffixes from the configuration -__configuration=${__configuration/-netcoreapp3_1/} -__configuration=${__configuration/-netfx/} __cmake_defines="-DCMAKE_BUILD_TYPE=${__configuration} ${__strip_argument} -DMKL_LIB_PATH=${__mkllibpath} -DMKL_LIB_RPATH=${__mkllibrpath}" __IntermediatesDir="$__baseIntermediateOutputPath/Native/$__build_arch.$__configuration" From db02163105ed24a9f8d0e5dba83b199e0ff55087 Mon Sep 17 00:00:00 2001 From: "Harish S. Kulkarni" Date: Tue, 1 Dec 2020 15:23:53 -0800 Subject: [PATCH 3/6] Updated docs --- .../Dynamic/ModelOperations/OnnxConversion.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/OnnxConversion.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/OnnxConversion.cs index 8a55b8fc64..2296e0cd29 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/OnnxConversion.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/OnnxConversion.cs @@ -82,7 +82,8 @@ public static void Example() //Create the pipeline using onnx file. var onnxModelPath = "your_path_to_sample_onnx_conversion_1.onnx"; var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxModelPath); - var onnxTransformer = onnxEstimator.Fit(trainTestOriginalData.TrainSet); + //Make sure to either use the 'using' clause or explicitly dispose the returned onnxTransformer to prevent memory leaks + using var onnxTransformer = onnxEstimator.Fit(trainTestOriginalData.TrainSet); //Inference the testset var output = transformer.Transform(trainTestOriginalData.TestSet); From 4a903d263d23e63b2a7705a7a330e061adbf9f95 Mon Sep 17 00:00:00 2001 From: "Harish S. Kulkarni" Date: Tue, 1 Dec 2020 15:28:33 -0800 Subject: [PATCH 4/6] Reverted OnnxRuntimeOutputCatcher to private class --- src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index 38d8beb800..64bd5a4b46 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -534,7 +534,7 @@ Delegate[] IRowMapper.CreateGetters(DataViewRow input, Func activeOut return result; } - internal class OnnxRuntimeOutputCacher : IDisposable + private class OnnxRuntimeOutputCacher : IDisposable { public long Position; public Dictionary Outputs; From 645a47fe94a19d895c6a384f6c0ffc6dd40c933c Mon Sep 17 00:00:00 2001 From: "Harish S. Kulkarni" Date: Tue, 1 Dec 2020 17:15:51 -0800 Subject: [PATCH 5/6] Addressed code review comments --- src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs | 10 ++-------- src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs | 3 ++- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index 64bd5a4b46..430bf6728e 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -534,7 +534,7 @@ Delegate[] IRowMapper.CreateGetters(DataViewRow input, Func activeOut return result; } - private class OnnxRuntimeOutputCacher : IDisposable + private sealed class OnnxRuntimeOutputCacher : IDisposable { public long Position; public Dictionary Outputs; @@ -548,19 +548,13 @@ public OnnxRuntimeOutputCacher() private bool _isDisposed; - protected virtual void Dispose(bool disposing) + public void Dispose() { if (_isDisposed) return; OutputOnnxValues?.Dispose(); _isDisposed = true; } - - public void Dispose() - { - Dispose(disposing: true); - GC.SuppressFinalize(this); - } } private void UpdateCacheIfNeeded(long position, INamedOnnxValueGetter[] srcNamedOnnxValueGetters, string[] activeOutputColNames, OnnxRuntimeOutputCacher outputCache) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs index 29c94087fc..4adf18fa40 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs @@ -235,9 +235,10 @@ public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = Graph = model.Graph; } - catch (Exception) + catch { _session.Dispose(); + _session = null; throw; } } From e14eafc35f4a6a34c27b0bec778107894241025b Mon Sep 17 00:00:00 2001 From: "Harish S. Kulkarni" Date: Tue, 1 Dec 2020 18:42:56 -0800 Subject: [PATCH 6/6] Refactored OnnxTransform back to using MapperBase based on code review comments --- .../Transforms/RowToRowTransformerBase.cs | 10 ++-- .../OnnxTransform.cs | 58 ++++++++----------- 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/RowToRowTransformerBase.cs b/src/Microsoft.ML.Data/Transforms/RowToRowTransformerBase.cs index baf3400eaf..5a6b15cf69 100644 --- a/src/Microsoft.ML.Data/Transforms/RowToRowTransformerBase.cs +++ b/src/Microsoft.ML.Data/Transforms/RowToRowTransformerBase.cs @@ -58,7 +58,7 @@ private protected abstract class MapperBase : IRowMapper { protected readonly IHost Host; protected readonly DataViewSchema InputSchema; - private readonly Lazy _outputColumns; + protected readonly Lazy OutputColumns; private readonly RowToRowTransformerBase _parent; protected MapperBase(IHost host, DataViewSchema inputSchema, RowToRowTransformerBase parent) @@ -68,21 +68,21 @@ protected MapperBase(IHost host, DataViewSchema inputSchema, RowToRowTransformer Host = host; InputSchema = inputSchema; _parent = parent; - _outputColumns = new Lazy(GetOutputColumnsCore); + OutputColumns = new Lazy(GetOutputColumnsCore); } protected abstract DataViewSchema.DetachedColumn[] GetOutputColumnsCore(); - DataViewSchema.DetachedColumn[] IRowMapper.GetOutputColumns() => _outputColumns.Value; + DataViewSchema.DetachedColumn[] IRowMapper.GetOutputColumns() => OutputColumns.Value; - Delegate[] IRowMapper.CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) + public virtual Delegate[] CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) { // REVIEW: it used to be that the mapper's input schema in the constructor was required to be reference-equal to the schema // of the input row. // It still has to be the same schema, but because we may make a transition from lazy to eager schema, the reference-equality // is no longer always possible. So, we relax the assert as below. Contracts.Assert(input.Schema == InputSchema); - int n = _outputColumns.Value.Length; + int n = OutputColumns.Value.Length; var result = new Delegate[n]; var disposers = new Action[n]; for (int i = 0; i < n; i++) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index 430bf6728e..9932019e7a 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -362,12 +362,8 @@ public void Dispose() _isDisposed = true; } - private sealed class Mapper : IRowMapper + private sealed class Mapper : MapperBase { - private readonly IHost _host; - private readonly DataViewSchema _inputSchema; - private readonly Lazy _outputColumns; - private readonly OnnxTransformer _parent; /// /// 's i-th element value tells the column index to @@ -383,11 +379,9 @@ private sealed class Mapper : IRowMapper /// private readonly Type[] _inputOnnxTypes; - public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) + public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) : + base(Contracts.CheckRef(parent, nameof(parent)).Host.Register(nameof(Mapper)), inputSchema, parent) { - _host = Contracts.CheckRef(parent, nameof(parent)).Host.Register(nameof(Mapper)); - _inputSchema = inputSchema; - _outputColumns = new Lazy(GetOutputColumnsCore); _parent = parent; _inputColIndices = new int[_parent.Inputs.Length]; @@ -407,7 +401,7 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) var col = inputSchema.GetColumnOrNull(_parent.Inputs[i]); if (!col.HasValue) - throw _host.ExceptSchemaMismatch(nameof(inputSchema),"input", _parent.Inputs[i]); + throw Host.ExceptSchemaMismatch(nameof(inputSchema),"input", _parent.Inputs[i]); _inputColIndices[i] = col.Value.Index; @@ -415,7 +409,7 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) var vectorType = type as VectorDataViewType; if (vectorType != null && vectorType.Size == 0) - throw _host.Except($"Variable length input columns not supported"); + throw Host.Except($"Variable length input columns not supported"); var itemType = type.GetItemType(); var nodeItemType = inputNodeInfo.DataViewType.GetItemType(); @@ -427,7 +421,7 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) // This is done to support a corner case originated in NimbusML. For more info, see: https://github.com/microsoft/NimbusML/issues/426 var isKeyType = itemType is KeyDataViewType; if (!isKeyType || itemType.RawType != nodeItemType.RawType) - throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.Inputs[i], inputNodeInfo.DataViewType.GetItemType().ToString(), type.ToString()); + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.Inputs[i], inputNodeInfo.DataViewType.GetItemType().ToString(), type.ToString()); } // If the column is one dimension we make sure that the total size of the Onnx shape matches. @@ -439,9 +433,8 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {String.Join(",", inputShape)}, but input data is of length {typeValueCount}."); } } - DataViewSchema.DetachedColumn[] IRowMapper.GetOutputColumns() => _outputColumns.Value; - private DataViewSchema.DetachedColumn[] GetOutputColumnsCore() + protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var stdSuffix = ".output"; var info = new DataViewSchema.DetachedColumn[_parent.Outputs.Length]; @@ -483,16 +476,19 @@ private void AddSlotNames(string columnName, DataViewSchema.Annotations.Builder builder.AddSlotNames(count, getter); } - private Func GetDependenciesCore(Func activeOutput) + private protected override Func GetDependenciesCore(Func activeOutput) { return col => Enumerable.Range(0, _parent.Outputs.Length).Any(i => activeOutput(i)) && _inputColIndices.Any(i => i == col); } - private void SaveModel(ModelSaveContext ctx) => _parent.SaveModel(ctx); + private protected override void SaveModel(ModelSaveContext ctx) => _parent.SaveModel(ctx); + + protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) + => throw new NotImplementedException("This should never be called!"); - private Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, OnnxRuntimeOutputCacher outputCacher) + private Delegate CreateGetter(DataViewRow input, int iinfo, Func activeOutput, OnnxRuntimeOutputCacher outputCacher) { - _host.AssertValue(input); + Host.AssertValue(input); var activeOutputColNames = _parent.Outputs.Where((x, i) => activeOutput(i)).ToArray(); @@ -513,19 +509,19 @@ private Delegate MakeGetter(DataViewRow input, int iinfo, Func active } } - Delegate[] IRowMapper.CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) + public override Delegate[] CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) { - Contracts.Assert(input.Schema == _inputSchema); + Contracts.Assert(input.Schema == InputSchema); OnnxRuntimeOutputCacher outputCacher = new OnnxRuntimeOutputCacher(); - int n = _outputColumns.Value.Length; + int n = OutputColumns.Value.Length; var result = new Delegate[n]; for (int i = 0; i < n; i++) { if (!activeOutput(i)) continue; - result[i] = MakeGetter(input, i, activeOutput, outputCacher); + result[i] = CreateGetter(input, i, activeOutput, outputCacher); } disposer = () => { @@ -583,14 +579,14 @@ private void UpdateCacheIfNeeded(long position, INamedOnnxValueGetter[] srcNamed private Delegate MakeTensorGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames, OnnxRuntimeOutputCacher outputCacher) { - _host.AssertValue(input); + Host.AssertValue(input); ValueGetter> valueGetter = (ref VBuffer dst) => { UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCacher); var namedOnnxValue = outputCacher.Outputs[_parent.Outputs[iinfo]]; var tensor = namedOnnxValue.AsTensor() as Microsoft.ML.OnnxRuntime.Tensors.DenseTensor; if (tensor == null) - throw _host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(T)}"); + throw Host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(T)}"); var editor = VBufferEditor.Create(ref dst, (int)tensor.Length); tensor.Buffer.Span.CopyTo(editor.Values); dst = editor.Commit(); @@ -601,14 +597,15 @@ private Delegate MakeTensorGetter(DataViewRow input, int iinfo, INamedOnnxVal private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames, OnnxRuntimeOutputCacher outputCacher) { - _host.AssertValue(input); + Host.AssertValue(input); + ValueGetter>> valueGetter = (ref VBuffer> dst) => { UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCacher); var namedOnnxValue = outputCacher.Outputs[_parent.Outputs[iinfo]]; var tensor = namedOnnxValue.AsTensor() as Microsoft.ML.OnnxRuntime.Tensors.DenseTensor; if (tensor == null) - throw _host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(string)}"); + throw Host.Except($"Output column {namedOnnxValue.Name} doesn't contain a DenseTensor of expected type {typeof(string)}"); // Create VBufferEditor to fill "dst" with the values in "denseTensor". var editor = VBufferEditor.Create(ref dst, (int)tensor.Length); @@ -623,7 +620,8 @@ private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnx private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames, OnnxRuntimeOutputCacher outputCacher) { - _host.AssertValue(input); + Host.AssertValue(input); + ValueGetter valueGetter = (ref T dst) => { UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCacher); @@ -704,12 +702,6 @@ private static INamedOnnxValueGetter CreateNamedOnnxValueGetterVecCore(DataVi return new NamedOnnxValueGetterVec(input, colIndex, onnxShape); } - void ICanSaveModel.Save(ModelSaveContext ctx) => SaveModel(ctx); - - Func IRowMapper.GetDependencies(Func activeOutput) => GetDependenciesCore(activeOutput); - - public ITransformer GetTransformer() => _parent; - /// /// Common function for wrapping ML.NET getter as a NamedOnnxValue getter. ///