From 834e4715afa070f5e5c3628693682943b3f0bd6d Mon Sep 17 00:00:00 2001
From: Abhishek Goswami <abgoswam@gmail.com>
Date: Thu, 7 Feb 2019 22:41:00 +0000
Subject: [PATCH] Creation of components through MLContext and cleanup (text
 transform) (#2394)

* text transform

* review comments

* review comments

* review comment on options
---
 .../Dynamic/TextTransform.cs                  |  12 +-
 .../TransformsStatic.cs                       |  16 +--
 .../Text/TextCatalog.cs                       |  26 ++--
 .../Text/TextFeaturizingEstimator.cs          | 119 ++++++++++++------
 .../PredictionEngineBench.cs                  |  10 +-
 ...sticDualCoordinateAscentClassifierBench.cs |  47 +++----
 .../UnitTests/TestEntryPoints.cs              |   8 +-
 .../Scenarios/Api/Estimators/Visibility.cs    |   4 +-
 .../Transformers/TextFeaturizerTests.cs       |   2 +-
 9 files changed, 143 insertions(+), 101 deletions(-)
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
index c1583d56d1..b597c8e47b 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
@@ -31,12 +31,12 @@ public static void TextTransform()
 
             // Another pipeline, that customizes the advanced settings of the FeaturizeText transformer.
             string customizedColumnName = "CustomizedTextFeatures";
-            var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, "SentimentText", s =>
-            {
-                s.KeepPunctuations = false;
-                s.KeepNumbers = false;
-                s.OutputTokens = true;
-                s.TextLanguage = TextFeaturizingEstimator.Language.English; // supports  English, French, German, Dutch, Italian, Spanish, Japanese
+            var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, new List<string> { "SentimentText" }, 
+                new TextFeaturizingEstimator.Options { 
+                    KeepPunctuations = false,
+                    KeepNumbers = false,
+                    OutputTokens = true,
+                    TextLanguage = TextFeaturizingEstimator.Language.English, // supports  English, French, German, Dutch, Italian, Spanish, Japanese
             });
 
             // The transformed data for both pipelines.
diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs
index 8a609b44b4..b1ccb06b84 100644
--- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs
@@ -1509,8 +1509,8 @@ internal sealed class OutPipelineColumn : Vector<float>
         {
             public readonly Scalar<string>[] Inputs;
 
-            public OutPipelineColumn(IEnumerable<Scalar<string>> inputs, Action<Settings> advancedSettings)
-                : base(new Reconciler(advancedSettings), inputs.ToArray())
+            public OutPipelineColumn(IEnumerable<Scalar<string>> inputs, Options options)
+                : base(new Reconciler(options), inputs.ToArray())
             {
                 Inputs = inputs.ToArray();
             }
@@ -1518,11 +1518,11 @@ public OutPipelineColumn(IEnumerable<Scalar<string>> inputs, Action<Settings> ad
 
         private sealed class Reconciler : EstimatorReconciler
         {
-            private readonly Action<Settings> _settings;
+            private readonly Options _settings;
 
-            public Reconciler(Action<Settings> advancedSettings)
+            public Reconciler(Options options)
             {
-                _settings = advancedSettings;
+                _settings = options;
             }
 
             public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
@@ -1543,14 +1543,14 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// </summary>
         /// <param name="input">Input data.</param>
         /// <param name="otherInputs">Additional data.</param>
-        /// <param name="advancedSettings">Delegate which allows you to set transformation settings.</param>
+        /// <param name="options">Advanced transform settings.</param>
         /// <returns></returns>
-        public static Vector<float> FeaturizeText(this Scalar<string> input, Scalar<string>[] otherInputs = null, Action<TextFeaturizingEstimator.Settings> advancedSettings = null)
+        public static Vector<float> FeaturizeText(this Scalar<string> input, Scalar<string>[] otherInputs = null, TextFeaturizingEstimator.Options options = null)
         {
             Contracts.CheckValue(input, nameof(input));
             Contracts.CheckValueOrNull(otherInputs);
             otherInputs = otherInputs ?? new Scalar<string>[0];
-            return new OutPipelineColumn(new[] { input }.Concat(otherInputs), advancedSettings);
+            return new OutPipelineColumn(new[] { input }.Concat(otherInputs), options);
         }
     }
 
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 57f56bfa64..5d5f0a0144 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -20,20 +20,11 @@ public static class TextCatalog
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
-        /// <param name="advancedSettings">Advanced transform settings</param>
-        /// <example>
-        /// <format type="text/markdown">
-        /// <![CDATA[
-        /// [!code-csharp[FeaturizeText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs)]
-        /// ]]>
-        /// </format>
-        /// </example>
         public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
-            string inputColumnName = null,
-            Action<TextFeaturizingEstimator.Settings> advancedSettings = null)
+            string inputColumnName = null)
             => new TextFeaturizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnName, advancedSettings);
+                outputColumnName, inputColumnName);
 
         /// <summary>
         /// Transform several text columns into featurized float array that represents counts of ngrams and char-grams.
@@ -41,13 +32,20 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>
         /// <param name="inputColumnNames">Name of the columns to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
-        /// <param name="advancedSettings">Advanced transform settings</param>
+        /// <param name="options">Advanced options to the algorithm.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[FeaturizeText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             IEnumerable<string> inputColumnNames,
-            Action<TextFeaturizingEstimator.Settings> advancedSettings = null)
+            TextFeaturizingEstimator.Options options)
             => new TextFeaturizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnNames, advancedSettings);
+                outputColumnName, inputColumnNames, options);
 
         /// <summary>
         /// Tokenize incoming text in <paramref name="inputColumnName"/> and output the tokens as <paramref name="outputColumnName"/>.
diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
index 5d4ed9ed89..985c6eb6fa 100644
--- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
@@ -120,26 +120,59 @@ internal sealed class Arguments : TransformInputBase
             public TextNormKind VectorNormalizer = TextNormKind.L2;
         }
 
-        public sealed class Settings
+        /// <summary>
+        /// Advanced options for the <see cref="TextFeaturizingEstimator"/>.
+        /// </summary>
+        public sealed class Options
         {
 #pragma warning disable MSML_NoInstanceInitializers // No initializers on instance fields or properties
+            /// <summary>
+            /// Dataset language.
+            /// </summary>
             public Language TextLanguage { get; set; } = DefaultLanguage;
+            /// <summary>
+            /// Casing used for the text.
+            /// </summary>
             public CaseNormalizationMode TextCase { get; set; } = CaseNormalizationMode.Lower;
+            /// <summary>
+            /// Whether to keep diacritical marks or remove them.
+            /// </summary>
             public bool KeepDiacritics { get; set; } = false;
+            /// <summary>
+            /// Whether to keep punctuation marks or remove them.
+            /// </summary>
             public bool KeepPunctuations { get; set; } = true;
+            /// <summary>
+            /// Whether to keep numbers or remove them.
+            /// </summary>
             public bool KeepNumbers { get; set; } = true;
+            /// <summary>
+            /// Whether to output the transformed text tokens as an additional column.
+            /// </summary>
             public bool OutputTokens { get; set; } = false;
+            /// <summary>
+            /// Vector Normalizer to use.
+            /// </summary>
             public TextNormKind VectorNormalizer { get; set; } = TextNormKind.L2;
+            /// <summary>
+            /// Whether to use stop remover or not.
+            /// </summary>
             public bool UseStopRemover { get; set; } = false;
+            /// <summary>
+            /// Whether to use char extractor or not.
+            /// </summary>
             public bool UseCharExtractor { get; set; } = true;
+            /// <summary>
+            /// Whether to use word extractor or not.
+            /// </summary>
             public bool UseWordExtractor { get; set; } = true;
 #pragma warning restore MSML_NoInstanceInitializers // No initializers on instance fields or properties
         }
 
-        public readonly string OutputColumn;
+        internal readonly string OutputColumn;
         private readonly string[] _inputColumns;
-        public IReadOnlyCollection<string> InputColumns => _inputColumns.AsReadOnly();
-        public Settings AdvancedSettings { get; }
+        internal IReadOnlyCollection<string> InputColumns => _inputColumns.AsReadOnly();
+        internal Options OptionalSettings { get; }
 
         // These parameters are hardcoded for now.
         // REVIEW: expose them once sub-transforms are estimators.
@@ -232,18 +265,18 @@ public bool NeedInitialSourceColumnConcatTransform
             public TransformApplierParams(TextFeaturizingEstimator parent)
             {
                 var host = parent._host;
-                host.Check(Enum.IsDefined(typeof(Language), parent.AdvancedSettings.TextLanguage));
-                host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), parent.AdvancedSettings.TextCase));
+                host.Check(Enum.IsDefined(typeof(Language), parent.OptionalSettings.TextLanguage));
+                host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), parent.OptionalSettings.TextCase));
                 WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary);
                 CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary);
-                VectorNormalizer = parent.AdvancedSettings.VectorNormalizer;
-                Language = parent.AdvancedSettings.TextLanguage;
-                UsePredefinedStopWordRemover = parent.AdvancedSettings.UseStopRemover;
-                TextCase = parent.AdvancedSettings.TextCase;
-                KeepDiacritics = parent.AdvancedSettings.KeepDiacritics;
-                KeepPunctuations = parent.AdvancedSettings.KeepPunctuations;
-                KeepNumbers = parent.AdvancedSettings.KeepNumbers;
-                OutputTextTokens = parent.AdvancedSettings.OutputTokens;
+                VectorNormalizer = parent.OptionalSettings.VectorNormalizer;
+                Language = parent.OptionalSettings.TextLanguage;
+                UsePredefinedStopWordRemover = parent.OptionalSettings.UseStopRemover;
+                TextCase = parent.OptionalSettings.TextCase;
+                KeepDiacritics = parent.OptionalSettings.KeepDiacritics;
+                KeepPunctuations = parent.OptionalSettings.KeepPunctuations;
+                KeepNumbers = parent.OptionalSettings.KeepNumbers;
+                OutputTextTokens = parent.OptionalSettings.OutputTokens;
                 Dictionary = parent._dictionary;
             }
         }
@@ -254,18 +287,16 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
         internal const string UserName = "Text Transform";
         internal const string LoaderSignature = "Text";
 
-        public const Language DefaultLanguage = Language.English;
+        internal const Language DefaultLanguage = Language.English;
 
         private const string TransformedTextColFormat = "{0}_TransformedText";
 
-        public TextFeaturizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
-            Action<Settings> advancedSettings = null)
-            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, advancedSettings)
+        internal TextFeaturizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null)
+            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName })
         {
         }
 
-        public TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable<string> source,
-            Action<Settings> advancedSettings = null)
+        internal TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable<string> source, Options options = null)
         {
             Contracts.CheckValue(env, nameof(env));
             _host = env.Register(nameof(TextFeaturizingEstimator));
@@ -273,21 +304,25 @@ public TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable<s
             _host.CheckParam(source.Any(), nameof(source));
             _host.CheckParam(!source.Any(string.IsNullOrWhiteSpace), nameof(source));
             _host.CheckNonEmpty(name, nameof(name));
-            _host.CheckValueOrNull(advancedSettings);
+            _host.CheckValueOrNull(options);
 
             _inputColumns = source.ToArray();
             OutputColumn = name;
 
-            AdvancedSettings = new Settings();
-            advancedSettings?.Invoke(AdvancedSettings);
+            OptionalSettings = new Options();
+            if (options != null)
+                OptionalSettings = options;
 
             _dictionary = null;
-            if (AdvancedSettings.UseWordExtractor)
+            if (OptionalSettings.UseWordExtractor)
                 _wordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments();
-            if (AdvancedSettings.UseCharExtractor)
+            if (OptionalSettings.UseCharExtractor)
                 _charFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments() { NgramLength = 3, AllLengths = false };
         }
 
+        /// <summary>
+        /// Trains and returns a <see cref="Transformer"/>.
+        /// </summary>
         public ITransformer Fit(IDataView input)
         {
             var h = _host;
@@ -463,7 +498,7 @@ public ITransformer Fit(IDataView input)
             return new Transformer(_host, input, view);
         }
 
-        public static ITransformer Create(IHostEnvironment env, ModelLoadContext ctx)
+        private static ITransformer Create(IHostEnvironment env, ModelLoadContext ctx)
             => new Transformer(env, ctx);
 
         private static string GenerateColumnName(Schema schema, string srcName, string xfTag)
@@ -471,6 +506,10 @@ private static string GenerateColumnName(Schema schema, string srcName, string x
             return schema.GetTempColumnName(string.Format("{0}_{1}", srcName, xfTag));
         }
 
+        /// <summary>
+        /// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the transformer.
+        /// Used for schema propagation and verification in a pipeline.
+        /// </summary>
         public SchemaShape GetOutputSchema(SchemaShape inputSchema)
         {
             _host.CheckValue(inputSchema, nameof(inputSchema));
@@ -485,12 +524,12 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
 
             var metadata = new List<SchemaShape.Column>(2);
             metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, TextType.Instance, false));
-            if (AdvancedSettings.VectorNormalizer != TextNormKind.None)
+            if (OptionalSettings.VectorNormalizer != TextNormKind.None)
                 metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false));
 
             result[OutputColumn] = new SchemaShape.Column(OutputColumn, SchemaShape.Column.VectorKind.Vector, NumberType.R4, false,
                 new SchemaShape(metadata));
-            if (AdvancedSettings.OutputTokens)
+            if (OptionalSettings.OutputTokens)
             {
                 string name = string.Format(TransformedTextColFormat, OutputColumn);
                 result[name] = new SchemaShape.Column(name, SchemaShape.Column.VectorKind.VariableVector, TextType.Instance, false);
@@ -502,18 +541,18 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
         // Factory method for SignatureDataTransform.
         internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView data)
         {
-            Action<Settings> settings = s =>
+            var settings = new Options
             {
-                s.TextLanguage = args.Language;
-                s.TextCase = args.TextCase;
-                s.KeepDiacritics = args.KeepDiacritics;
-                s.KeepPunctuations = args.KeepPunctuations;
-                s.KeepNumbers = args.KeepNumbers;
-                s.OutputTokens = args.OutputTokens;
-                s.VectorNormalizer = args.VectorNormalizer;
-                s.UseStopRemover = args.UsePredefinedStopWordRemover;
-                s.UseWordExtractor = args.WordFeatureExtractor != null;
-                s.UseCharExtractor = args.CharFeatureExtractor != null;
+                TextLanguage = args.Language,
+                TextCase = args.TextCase,
+                KeepDiacritics = args.KeepDiacritics,
+                KeepPunctuations = args.KeepPunctuations,
+                KeepNumbers = args.KeepNumbers,
+                OutputTokens = args.OutputTokens,
+                VectorNormalizer = args.VectorNormalizer,
+                UseStopRemover = args.UsePredefinedStopWordRemover,
+                UseWordExtractor = args.WordFeatureExtractor != null,
+                UseCharExtractor = args.CharFeatureExtractor != null,
             };
 
             var estimator = new TextFeaturizingEstimator(env, args.Columns.Name, args.Columns.Source ?? new[] { args.Columns.Name }, settings);
@@ -530,7 +569,7 @@ private sealed class Transformer : ITransformer, ICanSaveModel
             private readonly IHost _host;
             private readonly IDataView _xf;
 
-            public Transformer(IHostEnvironment env, IDataView input, IDataView view)
+            internal Transformer(IHostEnvironment env, IDataView input, IDataView view)
             {
                 _host = env.Register(nameof(Transformer));
                 _xf = ApplyTransformUtils.ApplyAllTransformsToData(_host, view, new EmptyDataView(_host, input.Schema), input);
diff --git a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
index d0d9ba7d0c..076fde30d1 100644
--- a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
+++ b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
@@ -72,8 +72,8 @@ public void SetupSentimentPipeline()
 
             string _sentimentDataPath = BaseTestClass.GetDataPath("wikipedia-detox-250-line-data.tsv");
 
-            var env = new MLContext(seed: 1, conc: 1);
-            var reader = new TextLoader(env, columns: new[]
+            var mlContext = new MLContext(seed: 1, conc: 1);
+            var reader = new TextLoader(mlContext, columns: new[]
                         {
                             new TextLoader.Column("Label", DataKind.BL, 0),
                             new TextLoader.Column("SentimentText", DataKind.Text, 1)
@@ -83,13 +83,13 @@ public void SetupSentimentPipeline()
 
             IDataView data = reader.Read(_sentimentDataPath);
 
-            var pipeline = new TextFeaturizingEstimator(env, "Features", "SentimentText")
-                .Append(env.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
+            var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
+                .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
                     new SdcaBinaryTrainer.Options {NumThreads = 1, ConvergenceTolerance = 1e-2f, }));
 
             var model = pipeline.Fit(data);
 
-            _sentimentModel = model.CreatePredictionEngine<SentimentData, SentimentPrediction>(env);
+            _sentimentModel = model.CreatePredictionEngine<SentimentData, SentimentPrediction>(mlContext);
         }
 
         [GlobalSetup(Target = nameof(MakeBreastCancerPredictions))]
diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
index 613dda6609..7c67d13ac4 100644
--- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
+++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -24,7 +24,7 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics
         private readonly string _sentimentDataPath = BaseTestClass.GetDataPath("wikipedia-detox-250-line-data.tsv");
         private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination
 
-        private readonly MLContext _env = new MLContext(seed: 1);
+        private readonly MLContext mlContext = new MLContext(seed: 1);
 
         private readonly int[] _batchSizes = new int[] { 1, 2, 5 };
 
@@ -54,7 +54,7 @@ protected override IEnumerable<Metric> GetMetrics()
 
         private TransformerChain<MulticlassPredictionTransformer<MulticlassLogisticRegressionModelParameters>> Train(string dataPath)
         {
-            var reader = new TextLoader(_env,
+            var reader = new TextLoader(mlContext,
                     columns: new[]
                     {
                             new TextLoader.Column("Label", DataKind.R4, 0),
@@ -68,8 +68,8 @@ private TransformerChain<MulticlassPredictionTransformer<MulticlassLogisticRegre
 
             IDataView data = reader.Read(dataPath);
 
-            var pipeline = new ColumnConcatenatingEstimator(_env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
-                .Append(_env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());
+            var pipeline = new ColumnConcatenatingEstimator(mlContext, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
+                .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());
 
             return pipeline.Fit(data);
         }
@@ -100,20 +100,23 @@ public void TrainSentiment()
                 AllowQuoting = false,
                 AllowSparse = false
             };
-            var loader = _env.Data.ReadFromTextFile(_sentimentDataPath, arguments);
-            var text = new TextFeaturizingEstimator(_env, "WordEmbeddings", "SentimentText", args =>
-            {
-                args.OutputTokens = true;
-                args.KeepPunctuations = false;
-                args.UseStopRemover = true;
-                args.VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None;
-                args.UseCharExtractor = false;
-                args.UseWordExtractor = false;
-            }).Fit(loader).Transform(loader);
-            var trans = _env.Transforms.Text.ExtractWordEmbeddings("Features", "WordEmbeddings_TransformedText",
+
+            var loader = mlContext.Data.ReadFromTextFile(_sentimentDataPath, arguments);
+            var text = mlContext.Transforms.Text.FeaturizeText("WordEmbeddings", new List<string> { "SentimentText" }, 
+                new TextFeaturizingEstimator.Options { 
+                    OutputTokens = true,
+                    KeepPunctuations = false,
+                    UseStopRemover = true,
+                    VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None,
+                    UseCharExtractor = false,
+                    UseWordExtractor = false,
+                }).Fit(loader).Transform(loader);
+
+            var trans = mlContext.Transforms.Text.ExtractWordEmbeddings("Features", "WordEmbeddings_TransformedText", 
                 WordEmbeddingsExtractingEstimator.PretrainedModelKind.Sswe).Fit(text).Transform(text);
+
             // Train
-            var trainer = _env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent();
+            var trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent();
             var predicted = trainer.Fit(trans);
             _consumer.Consume(predicted);
         }
@@ -122,10 +125,10 @@ public void TrainSentiment()
         public void SetupPredictBenchmarks()
         {
             _trainedModel = Train(_dataPath);
-            _predictionEngine = _trainedModel.CreatePredictionEngine<IrisData, IrisPrediction>(_env);
+            _predictionEngine = _trainedModel.CreatePredictionEngine<IrisData, IrisPrediction>(mlContext);
             _consumer.Consume(_predictionEngine.Predict(_example));
 
-            var reader = new TextLoader(_env,
+            var reader = new TextLoader(mlContext,
                     columns: new[]
                     {
                             new TextLoader.Column("Label", DataKind.R4, 0),
@@ -139,7 +142,7 @@ public void SetupPredictBenchmarks()
 
             IDataView testData = reader.Read(_dataPath);
             IDataView scoredTestData = _trainedModel.Transform(testData);
-            var evaluator = new MultiClassClassifierEvaluator(_env, new MultiClassClassifierEvaluator.Arguments());
+            var evaluator = new MultiClassClassifierEvaluator(mlContext, new MultiClassClassifierEvaluator.Arguments());
             _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel);
 
             _batches = new IrisData[_batchSizes.Length][];
@@ -158,13 +161,13 @@ public void SetupPredictBenchmarks()
         public float[] PredictIris() => _predictionEngine.Predict(_example).PredictedLabels;
 
         [Benchmark]
-        public void PredictIrisBatchOf1() => _trainedModel.Transform(_env.Data.ReadFromEnumerable(_batches[0]));
+        public void PredictIrisBatchOf1() => _trainedModel.Transform(mlContext.Data.ReadFromEnumerable(_batches[0]));
 
         [Benchmark]
-        public void PredictIrisBatchOf2() => _trainedModel.Transform(_env.Data.ReadFromEnumerable(_batches[1]));
+        public void PredictIrisBatchOf2() => _trainedModel.Transform(mlContext.Data.ReadFromEnumerable(_batches[1]));
 
         [Benchmark]
-        public void PredictIrisBatchOf5() => _trainedModel.Transform(_env.Data.ReadFromEnumerable(_batches[2]));
+        public void PredictIrisBatchOf5() => _trainedModel.Transform(mlContext.Data.ReadFromEnumerable(_batches[2]));
     }
 
     public class IrisData
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index e40435bfda..8fcb00842e 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -998,10 +998,10 @@ public void EntryPointPipelineEnsembleText()
                 var data = splitOutput.TrainData[i];
                 if (i % 2 == 0)
                 {
-                    data = new TextFeaturizingEstimator(Env, "Features", "Text", args =>
-                    {
-                        args.UseStopRemover = true;
-                    }).Fit(data).Transform(data);
+                    data = new TextFeaturizingEstimator(Env, "Features", new List<string> { "Text" }, 
+                        new TextFeaturizingEstimator.Options { 
+                            UseStopRemover = true,
+                        }).Fit(data).Transform(data);
                 }
                 else
                 {
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
index 4b546475fd..34df7a6023 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Collections.Generic;
 using System.Linq;
 using Microsoft.ML.Data;
 using Microsoft.ML.RunTests;
@@ -24,7 +25,8 @@ void Visibility()
         {
             var ml = new MLContext(seed: 1, conc: 1);
             var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
-                .Append(ml.Transforms.Text.FeaturizeText("Features", "SentimentText", s => s.OutputTokens = true));
+                .Append(ml.Transforms.Text.FeaturizeText("Features", new List<string> { "SentimentText" }, 
+                                                        new Transforms.Text.TextFeaturizingEstimator.Options { OutputTokens = true }));
 
             var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename));
             var data = pipeline.Fit(src).Read(src);
diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
index 61d2f2c94c..a224d17192 100644
--- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
@@ -43,7 +43,7 @@ public void TextFeaturizerWorkout()
                 .AsDynamic;
 
             var feat = data.MakeNewEstimator()
-                 .Append(row => row.text.FeaturizeText(advancedSettings: s => { s.OutputTokens = true; }));
+                 .Append(row => row.text.FeaturizeText(options: new TextFeaturizingEstimator.Options { OutputTokens = true, }));
 
             TestEstimatorCore(feat.AsDynamic, data.AsDynamic, invalidInput: invalidData);