Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding samples for data save and load from text and binary files #3745

Merged
merged 3 commits into from
May 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.ML;

namespace Samples.Dynamic
{
public static class SaveAndLoadFromBinary
najeeb-kazmi marked this conversation as resolved.
Show resolved Hide resolved
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = new List<DataPoint>()
{
new DataPoint(){ Label = 0, Features = 4},
new DataPoint(){ Label = 0, Features = 5},
new DataPoint(){ Label = 0, Features = 6},
new DataPoint(){ Label = 1, Features = 8},
new DataPoint(){ Label = 1, Features = 9},
};

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints);

// Create a FileStream object and write the IDataView to it as a binary IDV file.
using (FileStream stream = new FileStream("data.idv", FileMode.Create))
mlContext.Data.SaveAsBinary(data, stream);

// Create an IDataView object by loading the binary IDV file.
IDataView loadedData = mlContext.Data.LoadFromBinary("data.idv");

// Inspect the data that is loaded from the previously saved binary file.
var loadedDataEnumerable = mlContext.Data.CreateEnumerable<DataPoint>(loadedData, reuseRowObject: false);
foreach (DataPoint row in loadedDataEnumerable)
Console.WriteLine($"{row.Label}, {row.Features}");

// Preview of the loaded data.
// 0, 4
// 0, 5
// 0, 6
// 1, 8
// 1, 9
}

// Example with label and feature values. A data set is a collection of such examples.
private class DataPoint
{
public float Label { get; set; }

public float Features { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.ML;

namespace Samples.Dynamic
{
public static class SaveAndLoadFromText
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = new List<DataPoint>()
{
new DataPoint(){ Label = 0, Features = 4},
new DataPoint(){ Label = 0, Features = 5},
new DataPoint(){ Label = 0, Features = 6},
new DataPoint(){ Label = 1, Features = 8},
new DataPoint(){ Label = 1, Features = 9},
};

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints);

// Create a FileStream object and write the IDataView to it as a text file.
using (FileStream stream = new FileStream("data.tsv", FileMode.Create))
mlContext.Data.SaveAsText(data, stream);
najeeb-kazmi marked this conversation as resolved.
Show resolved Hide resolved

// Create an IDataView object by loading the text file.
IDataView loadedData = mlContext.Data.LoadFromTextFile("data.tsv");
najeeb-kazmi marked this conversation as resolved.
Show resolved Hide resolved

// Inspect the data that is loaded from the previously saved text file.
var loadedDataEnumerable = mlContext.Data.CreateEnumerable<DataPoint>(loadedData, reuseRowObject: false);
foreach (DataPoint row in loadedDataEnumerable)
Console.WriteLine($"{row.Label}, {row.Features}");

// Preview of the loaded data.
// 0, 4
// 0, 5
// 0, 6
// 1, 8
// 1, 9
}

// Example with label and feature values. A data set is a collection of such examples.
private class DataPoint
{
public float Label { get; set; }

public float Features { get; set; }
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, IMult
/// </summary>
/// <param name="catalog">The catalog.</param>
/// <param name="path">The path to the file to load from.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[LoadFromBinary](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs)]
/// ]]>
/// </format>
/// </example>
public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, string path)
{
Contracts.CheckNonEmpty(path, nameof(path));
Expand All @@ -54,6 +61,13 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, strin
/// <param name="data">The data view to save.</param>
/// <param name="stream">The stream to write to.</param>
/// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[SaveAsBinary](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs)]
/// ]]>
/// </format>
/// </example>
public static void SaveAsBinary(this DataOperationsCatalog catalog, IDataView data, Stream stream,
bool keepHidden = false)
{
Expand Down
14 changes: 14 additions & 0 deletions src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ public static IDataView LoadFromTextFile<TInput>(this DataOperationsCatalog cata
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
/// <param name="path">Specifies a file from which to load.</param>
/// <param name="options">Defines the settings of the load operation.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[LoadFromTextFile](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs)]
/// ]]>
/// </format>
/// </example>
public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, string path,
TextLoader.Options options = null)
{
Expand All @@ -186,6 +193,13 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, str
/// <param name="schema">Whether to write the header comment with the schema.</param>
/// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
/// <param name="forceDense">Whether to save columns in dense format even if they are sparse vectors.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[SaveAsText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs)]
/// ]]>
/// </format>
/// </example>
public static void SaveAsText(this DataOperationsCatalog catalog,
IDataView data,
Stream stream,
Expand Down