removed ReadFile, only kept MlContext ReadFromTextFile

dotnet · Dec 6, 2018 · 9d182f3 · 9d182f3
1 parent 490e03f
commit 9d182f3
Show file tree

Hide file tree

Showing 33 changed files with 70 additions and 85 deletions.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs
@@ -19,7 +19,7 @@ public static void FeatureContributionCalculationTransform_Regression()
 
             // Step 1: Read the data as an IDataView.
             // First, we define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(
+            var reader = mlContext.Data.CreateTextReader(
                 columns: new[]
                     {
                         new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
@@ -31,7 +31,7 @@ public static void FeatureSelectionTransform()
 
             // First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
             // all the feature columns into entries of a vector of a single column named "Features".
-            var reader = ml.Data.TextReader(
+            var reader = ml.Data.CreateTextReader(
                 columns: new[]
                     {
                         new TextLoader.Column("Label", DataKind.BL, 0),

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs
@@ -19,7 +19,7 @@ public static void RunExample()
 
             // Step 1: Read the data as an IDataView.
             // First, we define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(
+            var reader = mlContext.Data.CreateTextReader(
                 columns: new[]
                     {
                         new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs
@@ -22,7 +22,7 @@ public static void PFI_Regression()
             // First, we define the reader: specify the data columns and where to find them in the text file.
             // The data file is composed of rows of data, with each row having 11 numerical columns
             // separated by whitespace.
-            var reader = mlContext.Data.TextReader(
+            var reader = mlContext.Data.CreateTextReader(
                 columns: new[]
                     {
                         // Read the first column (indexed by 0) in the data file as an R4 (float)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
@@ -24,7 +24,7 @@ public static void SDCA_BinaryClassification()
 
             // Step 1: Read the data as an IDataView.
             // First, we define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(
+            var reader = mlContext.Data.CreateTextReader(
                 columns: new[]
                     {
                         new TextLoader.Column("Sentiment", DataKind.BL, 0),

diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
@@ -1316,26 +1316,6 @@ internal static IDataLoader Create(IHostEnvironment env, ModelLoadContext ctx, I
         internal static IDataLoader Create(IHostEnvironment env, Arguments args, IMultiStreamSource files)
             => (IDataLoader)new TextLoader(env, args, files).Read(files);
 
-        /// <summary>
-        /// Creates a <see cref="TextLoader"/> and uses it to read a specified file.
-        /// </summary>
-        /// <param name="env">The environment to use.</param>
-        /// <param name="columns">Defines a mapping between input columns in the file and IDataView columns.</param>
-        /// <param name="hasHeader">Whether the file has a header.</param>
-        /// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
-        /// <param name="fileSource">Specifies a file from which to read.</param>
-        public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Column[] columns, bool hasHeader = false, char separatorChar = '\t')
-            => new TextLoader(env, columns, hasHeader, separatorChar, fileSource).Read(fileSource);
-
-        /// <summary>
-        /// Loads a text file into an <see cref="IDataView"/>. Supports basic mapping from input columns to IDataView columns.
-        /// </summary>
-        /// <param name="env">The environment to use.</param>
-        /// <param name="fileSource">Specifies a file from which to read.</param>
-        /// <param name="args">Defines the settings of the load operation.</param>
-        public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args = null)
-            => new TextLoader(env, args, fileSource).Read(fileSource);
-
         public void Save(ModelSaveContext ctx)
         {
             _host.CheckValue(ctx, nameof(ctx));

diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
@@ -21,7 +21,7 @@ public static class TextLoaderSaverCatalog
         /// <param name="hasHeader">Whether the file has a header.</param>
         /// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
         /// <param name="dataSample">The optional location of a data sample.</param>
-        public static TextLoader TextReader(this DataOperations catalog,
+        public static TextLoader CreateTextReader(this DataOperations catalog,
             Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
             => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample);
 
@@ -31,7 +31,7 @@ public static TextLoader TextReader(this DataOperations catalog,
         /// <param name="catalog">The catalog.</param>
         /// <param name="args">Defines the settings of the load operation.</param>
         /// <param name="dataSample">Allows to expose items that can be used for reading.</param>
-        public static TextLoader TextReader(this DataOperations catalog, Arguments args, IMultiStreamSource dataSample = null)
+        public static TextLoader CreateTextReader(this DataOperations catalog, Arguments args, IMultiStreamSource dataSample = null)
             => new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample);
 
         /// <summary>
@@ -62,7 +62,7 @@ public static IDataView ReadFromTextFile(this DataOperations catalog,
         /// <param name="catalog">The catalog.</param>
         /// <param name="path">Specifies a file from which to read.</param>
         /// <param name="args">Defines the settings of the load operation.</param>
-        public static IDataView ReadFromTextFile(this DataOperations catalog, string path, Arguments args)
+        public static IDataView ReadFromTextFile(this DataOperations catalog, string path, Arguments args = null)
         {
             Contracts.CheckNonEmpty(path, nameof(path));
 

diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs
@@ -483,9 +483,10 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, stri
                             "{0} should not be specified when default loader is TextLoader. Ignoring {0}={1}",
                             nameof(Arguments.TermsColumn), src);
                     }
-                    termData = TextLoader.ReadFile(env, fileSource,
-                        columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) }
-                        );
+                    termData = new TextLoader(env,
+                        columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) },
+                        dataSample: fileSource)
+                        .Read(fileSource);
                     src = "Term";
                     autoConvert = true;
                 }

diff --git a/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs b/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs
@@ -283,7 +283,8 @@ public static IEnumerable<KeyValuePair<ColumnRole, string>> LoadRoleMappingsOrNu
             {
                 // REVIEW: Should really validate the schema here, and consider
                 // ignoring this stream if it isn't as expected.
-                var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
+                var loader = new TextLoader(env, dataSample: new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile))
+                    .Read(new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
 
                 using (var cursor = loader.GetRowCursor(c => true))
                 {

diff --git a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs
@@ -361,11 +361,14 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
             ulong max = ulong.MinValue;
             try
             {
-                var data = TextLoader.ReadFile(host, new MultiFileSource(filename), new[]
+                var data = new TextLoader(host, new[]
                     {
                         new TextLoader.Column("Term", DataKind.TX, 0),
                         new TextLoader.Column("Value", DataKind.TX, 1)
-                    });
+                    },
+                    dataSample: new MultiFileSource(filename)
+                ).Read(new MultiFileSource(filename));
+
                 using (var cursor = data.GetRowCursor(c => true))
                 {
                     var getTerm = cursor.GetGetter<ReadOnlyMemory<char>>(0);

diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -86,7 +86,7 @@ public void TrainSentiment()
                 AllowQuoting = false,
                 AllowSparse = false
             };
-            var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguments);
+            var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments);
 
             var text = TextFeaturizingEstimator.Create(env,
                 new TextFeaturizingEstimator.Arguments()

diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
@@ -606,12 +606,12 @@ public void RankingLightGBMTest()
         public void TestTreeEnsembleCombiner()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
-            var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
+            var dataView = ML.Data.ReadFromTextFile(dataPath);
 
             var fastTrees = new IPredictorModel[3];
             for (int i = 0; i < 3; i++)
             {
-                fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
+                fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
                 {
                     FeatureColumn = "Features",
                     NumTrees = 5,
@@ -628,13 +628,13 @@ public void TestTreeEnsembleCombiner()
         public void TestTreeEnsembleCombinerWithCategoricalSplits()
         {
             var dataPath = GetDataPath("adult.tiny.with-schema.txt");
-            var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
+            var dataView = ML.Data.ReadFromTextFile(dataPath);
 
-            var cat = new OneHotEncodingEstimator(Env, "Categories", "Features").Fit(dataView).Transform(dataView);
+            var cat = new OneHotEncodingEstimator(ML, "Categories", "Features").Fit(dataView).Transform(dataView);
             var fastTrees = new IPredictorModel[3];
             for (int i = 0; i < 3; i++)
             {
-                fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
+                fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
                 {
                     FeatureColumn = "Features",
                     NumTrees = 5,
@@ -729,35 +729,35 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr
         public void TestEnsembleCombiner()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
-            var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
+            var dataView = ML.Data.ReadFromTextFile(dataPath);
 
             var predictors = new IPredictorModel[]
             {
-                FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
+                FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
                 {
                     FeatureColumn = "Features",
                     NumTrees = 5,
                     NumLeaves = 4,
                     LabelColumn = DefaultColumnNames.Label,
                     TrainingData = dataView
                 }).PredictorModel,
-                AveragedPerceptronTrainer.TrainBinary(Env, new AveragedPerceptronTrainer.Arguments()
+                AveragedPerceptronTrainer.TrainBinary(ML, new AveragedPerceptronTrainer.Arguments()
                 {
                     FeatureColumn = "Features",
                     LabelColumn = DefaultColumnNames.Label,
                     NumIterations = 2,
                     TrainingData = dataView,
                     NormalizeFeatures = NormalizeOption.No
                 }).PredictorModel,
-                LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
+                LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
                 {
                     FeatureColumn = "Features",
                     LabelColumn = DefaultColumnNames.Label,
                     OptTol = 10e-4F,
                     TrainingData = dataView,
                     NormalizeFeatures = NormalizeOption.No
                 }).PredictorModel,
-                LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
+                LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
                 {
                     FeatureColumn = "Features",
                     LabelColumn = DefaultColumnNames.Label,
@@ -775,7 +775,7 @@ public void TestEnsembleCombiner()
         public void TestMultiClassEnsembleCombiner()
         {
             var dataPath = GetDataPath("breast-cancer.txt");
-            var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
+            var dataView = ML.Data.ReadFromTextFile(dataPath);
 
             var predictors = new IPredictorModel[]
             {

diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs
@@ -439,7 +439,7 @@ protected bool SaveLoadText(IDataView view, IHostEnvironment env,
 
             // Note that we don't pass in "args", but pass in a default args so we test
             // the auto-schema parsing.
-            var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData));
+            var loadedData = ML.Data.ReadFromTextFile(pathData);
             if (!CheckMetadataTypes(loadedData.Schema))
                 Failed();
 

diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs
@@ -14,7 +14,7 @@ namespace Microsoft.ML.TestFramework
 #pragma warning disable 612, 618
     public static class ModelHelper
     {
-        private static IHostEnvironment s_environment = new MLContext(seed: 1);
+        private static MLContext s_environment = new MLContext(seed: 1);
         private static ITransformModel s_housePriceModel;
 
         public static void WriteKcHousePriceModel(string dataPath, string outputModelPath)
@@ -41,7 +41,7 @@ public static void WriteKcHousePriceModel(string dataPath, Stream stream)
 
         public static IDataView GetKcHouseDataView(string dataPath)
         {
-            return Runtime.Data.TextLoader.ReadFile(s_environment, new MultiFileSource(dataPath), 
+            return s_environment.Data.ReadFromTextFile(dataPath, 
                 columns: new[]
                 {
                     new Runtime.Data.TextLoader.Column("Id", Runtime.Data.DataKind.TX, 0),

diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -41,7 +41,7 @@ private void IntermediateData(string dataPath)
             var mlContext = new MLContext();
 
             // Create the reader: define the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] {
+            var reader = mlContext.Data.CreateTextReader(new[] {
                     // A boolean column depicting the 'label'.
                     new TextLoader.Column("IsOver50K", DataKind.BL, 0),
                     // Three text columns.
@@ -91,7 +91,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m
 
             // Step one: read the data as an IDataView.
             // First, we define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] {
+            var reader = mlContext.Data.CreateTextReader(new[] {
                     // We read the first 11 values as a single float vector.
                     new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
 
@@ -152,7 +152,7 @@ private ITransformer TrainOnIris(string irisDataPath)
 
             // Step one: read the data as an IDataView.
             // First, we define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] {
+            var reader = mlContext.Data.CreateTextReader(new[] {
                     new TextLoader.Column("SepalLength", DataKind.R4, 0),
                     new TextLoader.Column("SepalWidth", DataKind.R4, 1),
                     new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -217,7 +217,7 @@ private void NormalizationWorkout(string dataPath)
             var mlContext = new MLContext();
 
             // Define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] {
+            var reader = mlContext.Data.CreateTextReader(new[] {
                     // The four features of the Iris dataset will be grouped together as one Features column.
                     new TextLoader.Column("Features", DataKind.R4, 0, 3),
                     // Label: kind of iris.
@@ -278,7 +278,7 @@ private void TextFeaturizationOn(string dataPath)
             var mlContext = new MLContext();
 
             // Define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] 
+            var reader = mlContext.Data.CreateTextReader(new[] 
                 {
                     new TextLoader.Column("IsToxic", DataKind.BL, 0),
                     new TextLoader.Column("Message", DataKind.TX, 1),
@@ -345,7 +345,7 @@ private void CategoricalFeaturizationOn(params string[] dataPath)
             var mlContext = new MLContext();
 
             // Define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] 
+            var reader = mlContext.Data.CreateTextReader(new[] 
                 {
                     new TextLoader.Column("Label", DataKind.BL, 0),
                     // We will load all the categorical features into one vector column of size 8.
@@ -406,7 +406,7 @@ private void CrossValidationOn(string dataPath)
 
             // Step one: read the data as an IDataView.
             // First, we define the reader: specify the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] 
+            var reader = mlContext.Data.CreateTextReader(new[] 
                 {
                     // We read the first 11 values as a single float vector.
                     new TextLoader.Column("SepalLength", DataKind.R4, 0),
@@ -463,7 +463,7 @@ private void ReadDataDynamic(string dataPath)
             var mlContext = new MLContext();
 
             // Create the reader: define the data columns and where to find them in the text file.
-            var reader = mlContext.Data.TextReader(new[] {
+            var reader = mlContext.Data.CreateTextReader(new[] {
 	                // We read the first 10 values as a single float vector.
                     new TextLoader.Column("FeatureVector", DataKind.R4, new[] {new TextLoader.Range(0, 9)}),
                     // Separately, read the target variable.

diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs
@@ -27,7 +27,7 @@ void New_CrossValidation()
         {
             var ml = new MLContext(seed: 1, conc: 1);
 
-            var data = ml.Data.TextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
+            var data = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
             // Pipeline.
             var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
                     .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.ConvergenceTolerance = 1f; s.NumThreads = 1; }));