From e464102dbd452823137e6a24b4594d4a5ae7c9aa Mon Sep 17 00:00:00 2001 From: Michael Sharp Date: Tue, 2 Mar 2021 16:32:55 -0800 Subject: [PATCH] fixes based on PR comments --- src/Microsoft.ML.TensorFlow/TensorflowTransform.cs | 9 +++++---- src/Microsoft.ML.TensorFlow/TensorflowUtils.cs | 6 +++--- .../BaselineOutput/Common/EntryPoints/core_manifest.json | 2 +- test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index b92ee04909..1537ab10cf 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -110,8 +110,9 @@ internal TensorFlowTransformer(IHostEnvironment env, TensorFlowModel tfModelInfo /// The output columns to generate. Names must match model specifications. Data types are inferred from model. /// Add a batch dimension to the input e.g. input = [224, 224, 3] => [-1, 224, 224, 3]. /// This parameter is used to deal with models that have unknown shape but the internal operators in the model require data to have batch dimension as well. - internal TensorFlowTransformer(IHostEnvironment env, TensorFlowModel tfModelInfo, string[] outputColumnNames, string[] inputColumnNames, bool addBatchDimensionInput = false) - : this(env, tfModelInfo.Session, outputColumnNames, inputColumnNames, IsSavedModel(env, tfModelInfo.ModelPath) ? tfModelInfo.ModelPath : null, false, addBatchDimensionInput) + /// If the first dimension of the output is unknown, should it be treated as batched or not. + internal TensorFlowTransformer(IHostEnvironment env, TensorFlowModel tfModelInfo, string[] outputColumnNames, string[] inputColumnNames, bool addBatchDimensionInput = false, bool treatOutputAsBatched = true) + : this(env, tfModelInfo.Session, outputColumnNames, inputColumnNames, IsSavedModel(env, tfModelInfo.ModelPath) ? tfModelInfo.ModelPath : null, false, addBatchDimensionInput, treatOutputAsBatched: treatOutputAsBatched) { } @@ -898,9 +899,9 @@ internal sealed class Options : TransformInputBase /// If the first dimension of the output is unknown, should it be treated as batched or not. e.g. output = [-1] will be read as a vector of unknown length when this is false. /// /// - /// This parameter is used to deal with models that have unknown output shape and it needs to be interpreted in ML.NET as a vector of unkown length and not as a batch dimension. + /// This parameter is used to deal with models that have unknown output shape and it needs to be interpreted in ML.NET as a vector of unknown length and not as a batch dimension. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "If the first dimension of the output is unknown, should it be treated as batched or not. e.g. output = [-1] will be read as a vector of unkown length when this is false.", SortOrder = 17)] + [Argument(ArgumentType.AtMostOnce, HelpText = "If the first dimension of the output is unknown, should it be treated as batched or not. e.g. output = [-1] will be read as a vector of unknown length when this is false.", SortOrder = 17)] public bool TreatOutputAsBatched = true; } diff --git a/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs b/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs index 39141d77d2..8fbbd772a0 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowUtils.cs @@ -99,9 +99,9 @@ internal static DataViewSchema GetModelSchema(IExceptionContext ectx, Graph grap columnType = new VectorDataViewType(mlType, tensorShape[0] > 0 ? tensorShape : tensorShape.Skip(1).ToArray()); } // When treatOutputAsBatched is false, if the first value is less than 0 we want to set it to 0. TensorFlow - // represents an unkown size as -1, but ML.NET represents it as 0 so we need to convert it. - // I.E. if the input dimensions are [-1, 5], ML.NET will read the -1 as a dimension of unkown length, and so the ML.NET - // data type will be a vector of 2 dimensions, where the first dimension is unkown and the second has a length of 5. + // represents an unknown size as -1, but ML.NET represents it as 0 so we need to convert it. + // I.E. if the input dimensions are [-1, 5], ML.NET will read the -1 as a dimension of unknown length, and so the ML.NET + // data type will be a vector of 2 dimensions, where the first dimension is unknown and the second has a length of 5. else { if (tensorShape[0] < 0) diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index b3445a4149..7253e4533f 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -23617,7 +23617,7 @@ { "Name": "TreatOutputAsBatched", "Type": "Bool", - "Desc": "If the first dimension of the output is unknown, should it be treated as batched or not. e.g. output = [-1] will be read as a vector of unkown length when this is false.", + "Desc": "If the first dimension of the output is unknown, should it be treated as batched or not. e.g. output = [-1] will be read as a vector of unknown length when this is false.", "Required": false, "SortOrder": 17.0, "IsNullable": false, diff --git a/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs b/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs index 6d1ab87e5d..ff6dbd456f 100644 --- a/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs @@ -183,7 +183,7 @@ public void TestTensorFlow() Assert.Equal(4, numRows); } } - + [TensorFlowFact] public void TreatOutputAsBatched() { @@ -211,7 +211,7 @@ public void TreatOutputAsBatched() var schema = pipe.Fit(data).Transform(data).Schema; // The dimensions of the output with treatOutputAsBatched set to false should be * 10 - // as the first dimension of -1 is treated as an unkown dimension. + // as the first dimension of -1 is treated as an unknown dimension. Assert.Equal(new VectorDataViewType(NumberDataViewType.Single, 0, 10), schema["Output"].Type); // Note that CamelCase column names are there to match the TF graph node names.