From 834e4715afa070f5e5c3628693682943b3f0bd6d Mon Sep 17 00:00:00 2001 From: Abhishek Goswami Date: Thu, 7 Feb 2019 22:41:00 +0000 Subject: [PATCH] Creation of components through MLContext and cleanup (text transform) (#2394) * text transform * review comments * review comments * review comment on options --- .../Dynamic/TextTransform.cs | 12 +- .../TransformsStatic.cs | 16 +-- .../Text/TextCatalog.cs | 26 ++-- .../Text/TextFeaturizingEstimator.cs | 119 ++++++++++++------ .../PredictionEngineBench.cs | 10 +- ...sticDualCoordinateAscentClassifierBench.cs | 47 +++---- .../UnitTests/TestEntryPoints.cs | 8 +- .../Scenarios/Api/Estimators/Visibility.cs | 4 +- .../Transformers/TextFeaturizerTests.cs | 2 +- 9 files changed, 143 insertions(+), 101 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs index c1583d56d1..b597c8e47b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs @@ -31,12 +31,12 @@ public static void TextTransform() // Another pipeline, that customizes the advanced settings of the FeaturizeText transformer. string customizedColumnName = "CustomizedTextFeatures"; - var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, "SentimentText", s => - { - s.KeepPunctuations = false; - s.KeepNumbers = false; - s.OutputTokens = true; - s.TextLanguage = TextFeaturizingEstimator.Language.English; // supports English, French, German, Dutch, Italian, Spanish, Japanese + var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, new List { "SentimentText" }, + new TextFeaturizingEstimator.Options { + KeepPunctuations = false, + KeepNumbers = false, + OutputTokens = true, + TextLanguage = TextFeaturizingEstimator.Language.English, // supports English, French, German, Dutch, Italian, Spanish, Japanese }); // The transformed data for both pipelines. diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 8a609b44b4..b1ccb06b84 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -1509,8 +1509,8 @@ internal sealed class OutPipelineColumn : Vector { public readonly Scalar[] Inputs; - public OutPipelineColumn(IEnumerable> inputs, Action advancedSettings) - : base(new Reconciler(advancedSettings), inputs.ToArray()) + public OutPipelineColumn(IEnumerable> inputs, Options options) + : base(new Reconciler(options), inputs.ToArray()) { Inputs = inputs.ToArray(); } @@ -1518,11 +1518,11 @@ public OutPipelineColumn(IEnumerable> inputs, Action ad private sealed class Reconciler : EstimatorReconciler { - private readonly Action _settings; + private readonly Options _settings; - public Reconciler(Action advancedSettings) + public Reconciler(Options options) { - _settings = advancedSettings; + _settings = options; } public override IEstimator Reconcile(IHostEnvironment env, @@ -1543,14 +1543,14 @@ public override IEstimator Reconcile(IHostEnvironment env, /// /// Input data. /// Additional data. - /// Delegate which allows you to set transformation settings. + /// Advanced transform settings. /// - public static Vector FeaturizeText(this Scalar input, Scalar[] otherInputs = null, Action advancedSettings = null) + public static Vector FeaturizeText(this Scalar input, Scalar[] otherInputs = null, TextFeaturizingEstimator.Options options = null) { Contracts.CheckValue(input, nameof(input)); Contracts.CheckValueOrNull(otherInputs); otherInputs = otherInputs ?? new Scalar[0]; - return new OutPipelineColumn(new[] { input }.Concat(otherInputs), advancedSettings); + return new OutPipelineColumn(new[] { input }.Concat(otherInputs), options); } } diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs index 57f56bfa64..5d5f0a0144 100644 --- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs +++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs @@ -20,20 +20,11 @@ public static class TextCatalog /// The text-related transform's catalog. /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. - /// Advanced transform settings - /// - /// - /// - /// - /// public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.TextTransforms catalog, string outputColumnName, - string inputColumnName = null, - Action advancedSettings = null) + string inputColumnName = null) => new TextFeaturizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), - outputColumnName, inputColumnName, advancedSettings); + outputColumnName, inputColumnName); /// /// Transform several text columns into featurized float array that represents counts of ngrams and char-grams. @@ -41,13 +32,20 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text /// The text-related transform's catalog. /// Name of the column resulting from the transformation of . /// Name of the columns to transform. If set to , the value of the will be used as source. - /// Advanced transform settings + /// Advanced options to the algorithm. + /// + /// + /// + /// + /// public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.TextTransforms catalog, string outputColumnName, IEnumerable inputColumnNames, - Action advancedSettings = null) + TextFeaturizingEstimator.Options options) => new TextFeaturizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), - outputColumnName, inputColumnNames, advancedSettings); + outputColumnName, inputColumnNames, options); /// /// Tokenize incoming text in and output the tokens as . diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs index 5d4ed9ed89..985c6eb6fa 100644 --- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs +++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs @@ -120,26 +120,59 @@ internal sealed class Arguments : TransformInputBase public TextNormKind VectorNormalizer = TextNormKind.L2; } - public sealed class Settings + /// + /// Advanced options for the . + /// + public sealed class Options { #pragma warning disable MSML_NoInstanceInitializers // No initializers on instance fields or properties + /// + /// Dataset language. + /// public Language TextLanguage { get; set; } = DefaultLanguage; + /// + /// Casing used for the text. + /// public CaseNormalizationMode TextCase { get; set; } = CaseNormalizationMode.Lower; + /// + /// Whether to keep diacritical marks or remove them. + /// public bool KeepDiacritics { get; set; } = false; + /// + /// Whether to keep punctuation marks or remove them. + /// public bool KeepPunctuations { get; set; } = true; + /// + /// Whether to keep numbers or remove them. + /// public bool KeepNumbers { get; set; } = true; + /// + /// Whether to output the transformed text tokens as an additional column. + /// public bool OutputTokens { get; set; } = false; + /// + /// Vector Normalizer to use. + /// public TextNormKind VectorNormalizer { get; set; } = TextNormKind.L2; + /// + /// Whether to use stop remover or not. + /// public bool UseStopRemover { get; set; } = false; + /// + /// Whether to use char extractor or not. + /// public bool UseCharExtractor { get; set; } = true; + /// + /// Whether to use word extractor or not. + /// public bool UseWordExtractor { get; set; } = true; #pragma warning restore MSML_NoInstanceInitializers // No initializers on instance fields or properties } - public readonly string OutputColumn; + internal readonly string OutputColumn; private readonly string[] _inputColumns; - public IReadOnlyCollection InputColumns => _inputColumns.AsReadOnly(); - public Settings AdvancedSettings { get; } + internal IReadOnlyCollection InputColumns => _inputColumns.AsReadOnly(); + internal Options OptionalSettings { get; } // These parameters are hardcoded for now. // REVIEW: expose them once sub-transforms are estimators. @@ -232,18 +265,18 @@ public bool NeedInitialSourceColumnConcatTransform public TransformApplierParams(TextFeaturizingEstimator parent) { var host = parent._host; - host.Check(Enum.IsDefined(typeof(Language), parent.AdvancedSettings.TextLanguage)); - host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), parent.AdvancedSettings.TextCase)); + host.Check(Enum.IsDefined(typeof(Language), parent.OptionalSettings.TextLanguage)); + host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), parent.OptionalSettings.TextCase)); WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary); CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary); - VectorNormalizer = parent.AdvancedSettings.VectorNormalizer; - Language = parent.AdvancedSettings.TextLanguage; - UsePredefinedStopWordRemover = parent.AdvancedSettings.UseStopRemover; - TextCase = parent.AdvancedSettings.TextCase; - KeepDiacritics = parent.AdvancedSettings.KeepDiacritics; - KeepPunctuations = parent.AdvancedSettings.KeepPunctuations; - KeepNumbers = parent.AdvancedSettings.KeepNumbers; - OutputTextTokens = parent.AdvancedSettings.OutputTokens; + VectorNormalizer = parent.OptionalSettings.VectorNormalizer; + Language = parent.OptionalSettings.TextLanguage; + UsePredefinedStopWordRemover = parent.OptionalSettings.UseStopRemover; + TextCase = parent.OptionalSettings.TextCase; + KeepDiacritics = parent.OptionalSettings.KeepDiacritics; + KeepPunctuations = parent.OptionalSettings.KeepPunctuations; + KeepNumbers = parent.OptionalSettings.KeepNumbers; + OutputTextTokens = parent.OptionalSettings.OutputTokens; Dictionary = parent._dictionary; } } @@ -254,18 +287,16 @@ public TransformApplierParams(TextFeaturizingEstimator parent) internal const string UserName = "Text Transform"; internal const string LoaderSignature = "Text"; - public const Language DefaultLanguage = Language.English; + internal const Language DefaultLanguage = Language.English; private const string TransformedTextColFormat = "{0}_TransformedText"; - public TextFeaturizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, - Action advancedSettings = null) - : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, advancedSettings) + internal TextFeaturizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null) + : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }) { } - public TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable source, - Action advancedSettings = null) + internal TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable source, Options options = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(TextFeaturizingEstimator)); @@ -273,21 +304,25 @@ public TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable + /// Trains and returns a . + /// public ITransformer Fit(IDataView input) { var h = _host; @@ -463,7 +498,7 @@ public ITransformer Fit(IDataView input) return new Transformer(_host, input, view); } - public static ITransformer Create(IHostEnvironment env, ModelLoadContext ctx) + private static ITransformer Create(IHostEnvironment env, ModelLoadContext ctx) => new Transformer(env, ctx); private static string GenerateColumnName(Schema schema, string srcName, string xfTag) @@ -471,6 +506,10 @@ private static string GenerateColumnName(Schema schema, string srcName, string x return schema.GetTempColumnName(string.Format("{0}_{1}", srcName, xfTag)); } + /// + /// Returns the of the schema which will be produced by the transformer. + /// Used for schema propagation and verification in a pipeline. + /// public SchemaShape GetOutputSchema(SchemaShape inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); @@ -485,12 +524,12 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) var metadata = new List(2); metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, TextType.Instance, false)); - if (AdvancedSettings.VectorNormalizer != TextNormKind.None) + if (OptionalSettings.VectorNormalizer != TextNormKind.None) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)); result[OutputColumn] = new SchemaShape.Column(OutputColumn, SchemaShape.Column.VectorKind.Vector, NumberType.R4, false, new SchemaShape(metadata)); - if (AdvancedSettings.OutputTokens) + if (OptionalSettings.OutputTokens) { string name = string.Format(TransformedTextColFormat, OutputColumn); result[name] = new SchemaShape.Column(name, SchemaShape.Column.VectorKind.VariableVector, TextType.Instance, false); @@ -502,18 +541,18 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) // Factory method for SignatureDataTransform. internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView data) { - Action settings = s => + var settings = new Options { - s.TextLanguage = args.Language; - s.TextCase = args.TextCase; - s.KeepDiacritics = args.KeepDiacritics; - s.KeepPunctuations = args.KeepPunctuations; - s.KeepNumbers = args.KeepNumbers; - s.OutputTokens = args.OutputTokens; - s.VectorNormalizer = args.VectorNormalizer; - s.UseStopRemover = args.UsePredefinedStopWordRemover; - s.UseWordExtractor = args.WordFeatureExtractor != null; - s.UseCharExtractor = args.CharFeatureExtractor != null; + TextLanguage = args.Language, + TextCase = args.TextCase, + KeepDiacritics = args.KeepDiacritics, + KeepPunctuations = args.KeepPunctuations, + KeepNumbers = args.KeepNumbers, + OutputTokens = args.OutputTokens, + VectorNormalizer = args.VectorNormalizer, + UseStopRemover = args.UsePredefinedStopWordRemover, + UseWordExtractor = args.WordFeatureExtractor != null, + UseCharExtractor = args.CharFeatureExtractor != null, }; var estimator = new TextFeaturizingEstimator(env, args.Columns.Name, args.Columns.Source ?? new[] { args.Columns.Name }, settings); @@ -530,7 +569,7 @@ private sealed class Transformer : ITransformer, ICanSaveModel private readonly IHost _host; private readonly IDataView _xf; - public Transformer(IHostEnvironment env, IDataView input, IDataView view) + internal Transformer(IHostEnvironment env, IDataView input, IDataView view) { _host = env.Register(nameof(Transformer)); _xf = ApplyTransformUtils.ApplyAllTransformsToData(_host, view, new EmptyDataView(_host, input.Schema), input); diff --git a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs index d0d9ba7d0c..076fde30d1 100644 --- a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs +++ b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs @@ -72,8 +72,8 @@ public void SetupSentimentPipeline() string _sentimentDataPath = BaseTestClass.GetDataPath("wikipedia-detox-250-line-data.tsv"); - var env = new MLContext(seed: 1, conc: 1); - var reader = new TextLoader(env, columns: new[] + var mlContext = new MLContext(seed: 1, conc: 1); + var reader = new TextLoader(mlContext, columns: new[] { new TextLoader.Column("Label", DataKind.BL, 0), new TextLoader.Column("SentimentText", DataKind.Text, 1) @@ -83,13 +83,13 @@ public void SetupSentimentPipeline() IDataView data = reader.Read(_sentimentDataPath); - var pipeline = new TextFeaturizingEstimator(env, "Features", "SentimentText") - .Append(env.BinaryClassification.Trainers.StochasticDualCoordinateAscent( + var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent( new SdcaBinaryTrainer.Options {NumThreads = 1, ConvergenceTolerance = 1e-2f, })); var model = pipeline.Fit(data); - _sentimentModel = model.CreatePredictionEngine(env); + _sentimentModel = model.CreatePredictionEngine(mlContext); } [GlobalSetup(Target = nameof(MakeBreastCancerPredictions))] diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 613dda6609..7c67d13ac4 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -24,7 +24,7 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics private readonly string _sentimentDataPath = BaseTestClass.GetDataPath("wikipedia-detox-250-line-data.tsv"); private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination - private readonly MLContext _env = new MLContext(seed: 1); + private readonly MLContext mlContext = new MLContext(seed: 1); private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; @@ -54,7 +54,7 @@ protected override IEnumerable GetMetrics() private TransformerChain> Train(string dataPath) { - var reader = new TextLoader(_env, + var reader = new TextLoader(mlContext, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), @@ -68,8 +68,8 @@ private TransformerChain - { - args.OutputTokens = true; - args.KeepPunctuations = false; - args.UseStopRemover = true; - args.VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None; - args.UseCharExtractor = false; - args.UseWordExtractor = false; - }).Fit(loader).Transform(loader); - var trans = _env.Transforms.Text.ExtractWordEmbeddings("Features", "WordEmbeddings_TransformedText", + + var loader = mlContext.Data.ReadFromTextFile(_sentimentDataPath, arguments); + var text = mlContext.Transforms.Text.FeaturizeText("WordEmbeddings", new List { "SentimentText" }, + new TextFeaturizingEstimator.Options { + OutputTokens = true, + KeepPunctuations = false, + UseStopRemover = true, + VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None, + UseCharExtractor = false, + UseWordExtractor = false, + }).Fit(loader).Transform(loader); + + var trans = mlContext.Transforms.Text.ExtractWordEmbeddings("Features", "WordEmbeddings_TransformedText", WordEmbeddingsExtractingEstimator.PretrainedModelKind.Sswe).Fit(text).Transform(text); + // Train - var trainer = _env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(); + var trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(); var predicted = trainer.Fit(trans); _consumer.Consume(predicted); } @@ -122,10 +125,10 @@ public void TrainSentiment() public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); - _predictionEngine = _trainedModel.CreatePredictionEngine(_env); + _predictionEngine = _trainedModel.CreatePredictionEngine(mlContext); _consumer.Consume(_predictionEngine.Predict(_example)); - var reader = new TextLoader(_env, + var reader = new TextLoader(mlContext, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), @@ -139,7 +142,7 @@ public void SetupPredictBenchmarks() IDataView testData = reader.Read(_dataPath); IDataView scoredTestData = _trainedModel.Transform(testData); - var evaluator = new MultiClassClassifierEvaluator(_env, new MultiClassClassifierEvaluator.Arguments()); + var evaluator = new MultiClassClassifierEvaluator(mlContext, new MultiClassClassifierEvaluator.Arguments()); _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); _batches = new IrisData[_batchSizes.Length][]; @@ -158,13 +161,13 @@ public void SetupPredictBenchmarks() public float[] PredictIris() => _predictionEngine.Predict(_example).PredictedLabels; [Benchmark] - public void PredictIrisBatchOf1() => _trainedModel.Transform(_env.Data.ReadFromEnumerable(_batches[0])); + public void PredictIrisBatchOf1() => _trainedModel.Transform(mlContext.Data.ReadFromEnumerable(_batches[0])); [Benchmark] - public void PredictIrisBatchOf2() => _trainedModel.Transform(_env.Data.ReadFromEnumerable(_batches[1])); + public void PredictIrisBatchOf2() => _trainedModel.Transform(mlContext.Data.ReadFromEnumerable(_batches[1])); [Benchmark] - public void PredictIrisBatchOf5() => _trainedModel.Transform(_env.Data.ReadFromEnumerable(_batches[2])); + public void PredictIrisBatchOf5() => _trainedModel.Transform(mlContext.Data.ReadFromEnumerable(_batches[2])); } public class IrisData diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index e40435bfda..8fcb00842e 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -998,10 +998,10 @@ public void EntryPointPipelineEnsembleText() var data = splitOutput.TrainData[i]; if (i % 2 == 0) { - data = new TextFeaturizingEstimator(Env, "Features", "Text", args => - { - args.UseStopRemover = true; - }).Fit(data).Transform(data); + data = new TextFeaturizingEstimator(Env, "Features", new List { "Text" }, + new TextFeaturizingEstimator.Options { + UseStopRemover = true, + }).Fit(data).Transform(data); } else { diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs index 4b546475fd..34df7a6023 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.RunTests; @@ -24,7 +25,8 @@ void Visibility() { var ml = new MLContext(seed: 1, conc: 1); var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) - .Append(ml.Transforms.Text.FeaturizeText("Features", "SentimentText", s => s.OutputTokens = true)); + .Append(ml.Transforms.Text.FeaturizeText("Features", new List { "SentimentText" }, + new Transforms.Text.TextFeaturizingEstimator.Options { OutputTokens = true })); var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)); var data = pipeline.Fit(src).Read(src); diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs index 61d2f2c94c..a224d17192 100644 --- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs @@ -43,7 +43,7 @@ public void TextFeaturizerWorkout() .AsDynamic; var feat = data.MakeNewEstimator() - .Append(row => row.text.FeaturizeText(advancedSettings: s => { s.OutputTokens = true; })); + .Append(row => row.text.FeaturizeText(options: new TextFeaturizingEstimator.Options { OutputTokens = true, })); TestEstimatorCore(feat.AsDynamic, data.AsDynamic, invalidInput: invalidData);