Skip to content

Commit

Permalink
Clean up of TextLoader constructor (#1784)
Browse files Browse the repository at this point in the history
  • Loading branch information
artidoro authored Dec 7, 2018
1 parent 2c87b19 commit 14c7a47
Show file tree
Hide file tree
Showing 42 changed files with 344 additions and 407 deletions.
107 changes: 46 additions & 61 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ This is how you can read this data:
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// A boolean column depicting the 'target label'.
IsOver50K: ctx.LoadBool(0),
// Three text columns.
Expand All @@ -115,9 +115,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
Expand All @@ -126,8 +124,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
},
// First line of the file is a header, not a data row.
HasHeader = true
});
hasHeader: true
);

// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
var data = reader.Read(dataPath);
Expand Down Expand Up @@ -155,7 +153,7 @@ This is how you can read this data:
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// A boolean column depicting the 'target label'.
IsOver50K: ctx.LoadBool(14),
// Three text columns.
Expand All @@ -175,19 +173,17 @@ The code is very similar using the dynamic API:
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
new TextLoader.Column("Workclass", DataKind.TX, 1),
new TextLoader.Column("Education", DataKind.TX, 2),
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
},
// First line of the file is a header, not a data row.
HasHeader = true
});
hasHeader: true
);

var data = reader.Read(exampleFile1, exampleFile2);
```
Expand All @@ -211,7 +207,7 @@ Reading this file using `TextLoader`:
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// We read the first 11 values as a single float vector.
FeatureVector: ctx.LoadFloat(0, 10),
// Separately, read the target variable.
Expand All @@ -233,7 +229,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 10 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, new[] {new TextLoader.Range(0, 9)}),
// Separately, read the target variable.
Expand Down Expand Up @@ -302,7 +298,7 @@ Label Workclass education marital-status
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// A boolean column depicting the 'target label'.
IsOver50K: ctx.LoadBool(0),
// Three text columns.
Expand Down Expand Up @@ -365,19 +361,17 @@ You can also use the dynamic API to create the equivalent of the previous pipeli
var mlContext = new MLContext();

// Create the reader: define the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
new TextLoader.Column("Workclass", DataKind.TX, 1),
new TextLoader.Column("Education", DataKind.TX, 2),
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
},
// First line of the file is a header, not a data row.
HasHeader = true
});
hasHeader: true
);

// Start creating our processing pipeline. For now, let's just concatenate all the text columns
// together into one.
Expand Down Expand Up @@ -428,7 +422,7 @@ var mlContext = new MLContext();

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// We read the first 11 values as a single float vector.
FeatureVector: ctx.LoadFloat(0, 10),
// Separately, read the target variable.
Expand Down Expand Up @@ -482,20 +476,18 @@ var mlContext = new MLContext();

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 11 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),

// Separately, read the target variable.
new TextLoader.Column("Target", DataKind.R4, 11),
},
// First line of the file is a header, not a data row.
HasHeader = true,
hasHeader: true,
// Default separator is tab, but we need a semicolon.
Separator = ";"
});
separatorChar: ';'
);

// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
var trainData = reader.Read(trainDataPath);
Expand Down Expand Up @@ -603,7 +595,7 @@ var mlContext = new MLContext();

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
Expand Down Expand Up @@ -653,9 +645,7 @@ var mlContext = new MLContext();

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
new TextLoader.Column("PetalLength", DataKind.R4, 2),
Expand All @@ -664,8 +654,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
Separator = ","
});
separatorChar: ','
);

// Retrieve the training data.
var trainData = reader.Read(irisDataPath);
Expand Down Expand Up @@ -821,7 +811,7 @@ var mlContext = new MLContext();

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
Expand Down Expand Up @@ -917,7 +907,7 @@ Here's a snippet of code that demonstrates normalization in learning pipelines.
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset will be grouped together as one Features column.
Features: ctx.LoadFloat(0, 3),
// Label: kind of iris.
Expand Down Expand Up @@ -952,17 +942,15 @@ You can achieve the same results using the dynamic API.
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[] {
// The four features of the Iris dataset will be grouped together as one Features column.
new TextLoader.Column("Features", DataKind.R4, 0, 3),
// Label: kind of iris.
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
Separator = ","
});
separatorChar: ','
);

// Read the training data.
var trainData = reader.Read(dataPath);
Expand Down Expand Up @@ -1011,7 +999,7 @@ Label Workclass education marital-status occupation relationship ethnicity sex n
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
Label: ctx.LoadBool(0),
// We will load all the categorical features into one vector column of size 8.
CategoricalFeatures: ctx.LoadText(1, 8),
Expand Down Expand Up @@ -1073,9 +1061,8 @@ You can achieve the same results using the dynamic API.
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
// We will load all the categorical features into one vector column of size 8.
new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
Expand All @@ -1084,8 +1071,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
// Let's also separately load the 'Workclass' column.
new TextLoader.Column("Workclass", DataKind.TX, 1),
},
HasHeader = true
});
hasHeader: true
);

// Read the data.
var data = reader.Read(dataPath);
Expand Down Expand Up @@ -1157,7 +1144,7 @@ Sentiment SentimentText
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
IsToxic: ctx.LoadBool(0),
Message: ctx.LoadText(1)
), hasHeader: true);
Expand Down Expand Up @@ -1207,14 +1194,13 @@ You can achieve the same results using the dynamic API.
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[]
{
new TextLoader.Column("IsToxic", DataKind.BL, 0),
new TextLoader.Column("Message", DataKind.TX, 1),
},
HasHeader = true
});
hasHeader: true
);

// Read the data.
var data = reader.Read(dataPath);
Expand Down Expand Up @@ -1274,7 +1260,7 @@ var mlContext = new MLContext();

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
Expand Down Expand Up @@ -1330,9 +1316,8 @@ var mlContext = new MLContext();

// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
{
Column = new[] {
var reader = mlContext.Data.CreateTextReader(new[]
{
// We read the first 11 values as a single float vector.
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
Expand All @@ -1342,8 +1327,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
Separator = ","
});
separatorChar: ','
);

// Read the data.
var data = reader.Read(dataPath);
Expand Down Expand Up @@ -1395,7 +1380,7 @@ var mlContext = new MLContext();

// Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(ctx => (
var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@ public static void FeatureContributionCalculationTransform_Regression()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
{
Separator = "tab",
HasHeader = true,
Column = new[]
var reader = mlContext.Data.CreateTextReader(
columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
Expand All @@ -37,8 +34,9 @@ public static void FeatureContributionCalculationTransform_Regression()
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
new TextLoader.Column("TaxRate", DataKind.R4, 10),
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
}
});
},
hasHeader: true
);

// Read the data
var data = reader.Read(dataFile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,14 @@ public static void FeatureSelectionTransform()

// First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
// all the feature columns into entries of a vector of a single column named "Features".
var reader = ml.Data.TextReader(new TextLoader.Arguments()
{
Separator = "tab",
HasHeader = true,
Column = new[]
var reader = ml.Data.CreateTextReader(
columns: new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
new TextLoader.Column("Features", DataKind.Num, new [] { new TextLoader.Range(1, 9) })
}
});
},
hasHeader: true
);

// Then, we use the reader to read the data as an IDataView.
var data = reader.Read(dataFilePath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@ public static void RunExample()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
{
Separator = "tab",
HasHeader = true,
Column = new[]
var reader = mlContext.Data.CreateTextReader(
columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
Expand All @@ -37,8 +34,9 @@ public static void RunExample()
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
new TextLoader.Column("TaxRate", DataKind.R4, 10),
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
}
});
},
hasHeader: true
);

// Read the data
var data = reader.Read(dataFile);
Expand Down
Loading

0 comments on commit 14c7a47

Please sign in to comment.