Skip to content

Commit

Permalink
removed ReadFile, only kept MlContext ReadFromTextFile
Browse files Browse the repository at this point in the history
  • Loading branch information
artidoro committed Dec 6, 2018
1 parent 490e03f commit 9d182f3
Show file tree
Hide file tree
Showing 33 changed files with 70 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void FeatureContributionCalculationTransform_Regression()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(
var reader = mlContext.Data.CreateTextReader(
columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public static void FeatureSelectionTransform()

// First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
// all the feature columns into entries of a vector of a single column named "Features".
var reader = ml.Data.TextReader(
var reader = ml.Data.CreateTextReader(
columns: new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void RunExample()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(
var reader = mlContext.Data.CreateTextReader(
columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static void PFI_Regression()
// First, we define the reader: specify the data columns and where to find them in the text file.
// The data file is composed of rows of data, with each row having 11 numerical columns
// separated by whitespace.
var reader = mlContext.Data.TextReader(
var reader = mlContext.Data.CreateTextReader(
columns: new[]
{
// Read the first column (indexed by 0) in the data file as an R4 (float)
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public static void SDCA_BinaryClassification()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(
var reader = mlContext.Data.CreateTextReader(
columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.BL, 0),
Expand Down
20 changes: 0 additions & 20 deletions src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1316,26 +1316,6 @@ internal static IDataLoader Create(IHostEnvironment env, ModelLoadContext ctx, I
internal static IDataLoader Create(IHostEnvironment env, Arguments args, IMultiStreamSource files)
=> (IDataLoader)new TextLoader(env, args, files).Read(files);

/// <summary>
/// Creates a <see cref="TextLoader"/> and uses it to read a specified file.
/// </summary>
/// <param name="env">The environment to use.</param>
/// <param name="columns">Defines a mapping between input columns in the file and IDataView columns.</param>
/// <param name="hasHeader">Whether the file has a header.</param>
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
/// <param name="fileSource">Specifies a file from which to read.</param>
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Column[] columns, bool hasHeader = false, char separatorChar = '\t')
=> new TextLoader(env, columns, hasHeader, separatorChar, fileSource).Read(fileSource);

/// <summary>
/// Loads a text file into an <see cref="IDataView"/>. Supports basic mapping from input columns to IDataView columns.
/// </summary>
/// <param name="env">The environment to use.</param>
/// <param name="fileSource">Specifies a file from which to read.</param>
/// <param name="args">Defines the settings of the load operation.</param>
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args = null)
=> new TextLoader(env, args, fileSource).Read(fileSource);

public void Save(ModelSaveContext ctx)
{
_host.CheckValue(ctx, nameof(ctx));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static class TextLoaderSaverCatalog
/// <param name="hasHeader">Whether the file has a header.</param>
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
/// <param name="dataSample">The optional location of a data sample.</param>
public static TextLoader TextReader(this DataOperations catalog,
public static TextLoader CreateTextReader(this DataOperations catalog,
Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample);

Expand All @@ -31,7 +31,7 @@ public static TextLoader TextReader(this DataOperations catalog,
/// <param name="catalog">The catalog.</param>
/// <param name="args">Defines the settings of the load operation.</param>
/// <param name="dataSample">Allows to expose items that can be used for reading.</param>
public static TextLoader TextReader(this DataOperations catalog, Arguments args, IMultiStreamSource dataSample = null)
public static TextLoader CreateTextReader(this DataOperations catalog, Arguments args, IMultiStreamSource dataSample = null)
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample);

/// <summary>
Expand Down Expand Up @@ -62,7 +62,7 @@ public static IDataView ReadFromTextFile(this DataOperations catalog,
/// <param name="catalog">The catalog.</param>
/// <param name="path">Specifies a file from which to read.</param>
/// <param name="args">Defines the settings of the load operation.</param>
public static IDataView ReadFromTextFile(this DataOperations catalog, string path, Arguments args)
public static IDataView ReadFromTextFile(this DataOperations catalog, string path, Arguments args = null)
{
Contracts.CheckNonEmpty(path, nameof(path));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,10 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, stri
"{0} should not be specified when default loader is TextLoader. Ignoring {0}={1}",
nameof(Arguments.TermsColumn), src);
}
termData = TextLoader.ReadFile(env, fileSource,
columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) }
);
termData = new TextLoader(env,
columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) },
dataSample: fileSource)
.Read(fileSource);
src = "Term";
autoConvert = true;
}
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,8 @@ public static IEnumerable<KeyValuePair<ColumnRole, string>> LoadRoleMappingsOrNu
{
// REVIEW: Should really validate the schema here, and consider
// ignoring this stream if it isn't as expected.
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
var loader = new TextLoader(env, dataSample: new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile))
.Read(new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));

using (var cursor = loader.GetRowCursor(c => true))
{
Expand Down
7 changes: 5 additions & 2 deletions src/Microsoft.ML.Transforms/TermLookupTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -361,11 +361,14 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
ulong max = ulong.MinValue;
try
{
var data = TextLoader.ReadFile(host, new MultiFileSource(filename), new[]
var data = new TextLoader(host, new[]
{
new TextLoader.Column("Term", DataKind.TX, 0),
new TextLoader.Column("Value", DataKind.TX, 1)
});
},
dataSample: new MultiFileSource(filename)
).Read(new MultiFileSource(filename));

using (var cursor = data.GetRowCursor(c => true))
{
var getTerm = cursor.GetGetter<ReadOnlyMemory<char>>(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public void TrainSentiment()
AllowQuoting = false,
AllowSparse = false
};
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguments);
var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments);

var text = TextFeaturizingEstimator.Create(env,
new TextFeaturizingEstimator.Arguments()
Expand Down
22 changes: 11 additions & 11 deletions test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
Original file line number Diff line number Diff line change
Expand Up @@ -606,12 +606,12 @@ public void RankingLightGBMTest()
public void TestTreeEnsembleCombiner()
{
var dataPath = GetDataPath("breast-cancer.txt");
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
var dataView = ML.Data.ReadFromTextFile(dataPath);

var fastTrees = new IPredictorModel[3];
for (int i = 0; i < 3; i++)
{
fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
{
FeatureColumn = "Features",
NumTrees = 5,
Expand All @@ -628,13 +628,13 @@ public void TestTreeEnsembleCombiner()
public void TestTreeEnsembleCombinerWithCategoricalSplits()
{
var dataPath = GetDataPath("adult.tiny.with-schema.txt");
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
var dataView = ML.Data.ReadFromTextFile(dataPath);

var cat = new OneHotEncodingEstimator(Env, "Categories", "Features").Fit(dataView).Transform(dataView);
var cat = new OneHotEncodingEstimator(ML, "Categories", "Features").Fit(dataView).Transform(dataView);
var fastTrees = new IPredictorModel[3];
for (int i = 0; i < 3; i++)
{
fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
{
FeatureColumn = "Features",
NumTrees = 5,
Expand Down Expand Up @@ -729,35 +729,35 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr
public void TestEnsembleCombiner()
{
var dataPath = GetDataPath("breast-cancer.txt");
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
var dataView = ML.Data.ReadFromTextFile(dataPath);

var predictors = new IPredictorModel[]
{
FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
{
FeatureColumn = "Features",
NumTrees = 5,
NumLeaves = 4,
LabelColumn = DefaultColumnNames.Label,
TrainingData = dataView
}).PredictorModel,
AveragedPerceptronTrainer.TrainBinary(Env, new AveragedPerceptronTrainer.Arguments()
AveragedPerceptronTrainer.TrainBinary(ML, new AveragedPerceptronTrainer.Arguments()
{
FeatureColumn = "Features",
LabelColumn = DefaultColumnNames.Label,
NumIterations = 2,
TrainingData = dataView,
NormalizeFeatures = NormalizeOption.No
}).PredictorModel,
LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
{
FeatureColumn = "Features",
LabelColumn = DefaultColumnNames.Label,
OptTol = 10e-4F,
TrainingData = dataView,
NormalizeFeatures = NormalizeOption.No
}).PredictorModel,
LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
{
FeatureColumn = "Features",
LabelColumn = DefaultColumnNames.Label,
Expand All @@ -775,7 +775,7 @@ public void TestEnsembleCombiner()
public void TestMultiClassEnsembleCombiner()
{
var dataPath = GetDataPath("breast-cancer.txt");
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
var dataView = ML.Data.ReadFromTextFile(dataPath);

var predictors = new IPredictorModel[]
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ protected bool SaveLoadText(IDataView view, IHostEnvironment env,

// Note that we don't pass in "args", but pass in a default args so we test
// the auto-schema parsing.
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData));
var loadedData = ML.Data.ReadFromTextFile(pathData);
if (!CheckMetadataTypes(loadedData.Schema))
Failed();

Expand Down
4 changes: 2 additions & 2 deletions test/Microsoft.ML.TestFramework/ModelHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace Microsoft.ML.TestFramework
#pragma warning disable 612, 618
public static class ModelHelper
{
private static IHostEnvironment s_environment = new MLContext(seed: 1);
private static MLContext s_environment = new MLContext(seed: 1);
private static ITransformModel s_housePriceModel;

public static void WriteKcHousePriceModel(string dataPath, string outputModelPath)
Expand All @@ -41,7 +41,7 @@ public static void WriteKcHousePriceModel(string dataPath, Stream stream)

public static IDataView GetKcHouseDataView(string dataPath)
{
return Runtime.Data.TextLoader.ReadFile(s_environment, new MultiFileSource(dataPath),
return s_environment.Data.ReadFromTextFile(dataPath,
columns: new[]
{
new Runtime.Data.TextLoader.Column("Id", Runtime.Data.DataKind.TX, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ private void IntermediateData(string dataPath)
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
Expand Down Expand Up @@ -91,7 +91,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 11 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),

Expand Down Expand Up @@ -152,7 +152,7 @@ private ITransformer TrainOnIris(string irisDataPath)

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
new TextLoader.Column("PetalLength", DataKind.R4, 2),
Expand Down Expand Up @@ -217,7 +217,7 @@ private void NormalizationWorkout(string dataPath)
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// The four features of the Iris dataset will be grouped together as one Features column.
new TextLoader.Column("Features", DataKind.R4, 0, 3),
// Label: kind of iris.
Expand Down Expand Up @@ -278,7 +278,7 @@ private void TextFeaturizationOn(string dataPath)
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[]
var reader = mlContext.Data.CreateTextReader(new[]
{
new TextLoader.Column("IsToxic", DataKind.BL, 0),
new TextLoader.Column("Message", DataKind.TX, 1),
Expand Down Expand Up @@ -345,7 +345,7 @@ private void CategoricalFeaturizationOn(params string[] dataPath)
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[]
var reader = mlContext.Data.CreateTextReader(new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
// We will load all the categorical features into one vector column of size 8.
Expand Down Expand Up @@ -406,7 +406,7 @@ private void CrossValidationOn(string dataPath)

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[]
var reader = mlContext.Data.CreateTextReader(new[]
{
// We read the first 11 values as a single float vector.
new TextLoader.Column("SepalLength", DataKind.R4, 0),
Expand Down Expand Up @@ -463,7 +463,7 @@ private void ReadDataDynamic(string dataPath)
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 10 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, new[] {new TextLoader.Range(0, 9)}),
// Separately, read the target variable.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void New_CrossValidation()
{
var ml = new MLContext(seed: 1, conc: 1);

var data = ml.Data.TextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
var data = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
// Pipeline.
var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.ConvergenceTolerance = 1f; s.NumThreads = 1; }));
Expand Down
Loading

0 comments on commit 9d182f3

Please sign in to comment.