diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs new file mode 100644 index 0000000000..b6448ec857 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.ML; + +namespace Samples.Dynamic +{ + public static class SaveAndLoadFromBinary + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = new List() + { + new DataPoint(){ Label = 0, Features = 4}, + new DataPoint(){ Label = 0, Features = 5}, + new DataPoint(){ Label = 0, Features = 6}, + new DataPoint(){ Label = 1, Features = 8}, + new DataPoint(){ Label = 1, Features = 9}, + }; + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints); + + // Create a FileStream object and write the IDataView to it as a binary IDV file. + using (FileStream stream = new FileStream("data.idv", FileMode.Create)) + mlContext.Data.SaveAsBinary(data, stream); + + // Create an IDataView object by loading the binary IDV file. + IDataView loadedData = mlContext.Data.LoadFromBinary("data.idv"); + + // Inspect the data that is loaded from the previously saved binary file. + var loadedDataEnumerable = mlContext.Data.CreateEnumerable(loadedData, reuseRowObject: false); + foreach (DataPoint row in loadedDataEnumerable) + Console.WriteLine($"{row.Label}, {row.Features}"); + + // Preview of the loaded data. + // 0, 4 + // 0, 5 + // 0, 6 + // 1, 8 + // 1, 9 + } + + // Example with label and feature values. A data set is a collection of such examples. + private class DataPoint + { + public float Label { get; set; } + + public float Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs new file mode 100644 index 0000000000..9918b736ce --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs @@ -0,0 +1,59 @@ +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.ML; + +namespace Samples.Dynamic +{ + public static class SaveAndLoadFromText + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = new List() + { + new DataPoint(){ Label = 0, Features = 4}, + new DataPoint(){ Label = 0, Features = 5}, + new DataPoint(){ Label = 0, Features = 6}, + new DataPoint(){ Label = 1, Features = 8}, + new DataPoint(){ Label = 1, Features = 9}, + }; + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints); + + // Create a FileStream object and write the IDataView to it as a text file. + using (FileStream stream = new FileStream("data.tsv", FileMode.Create)) + mlContext.Data.SaveAsText(data, stream); + + // Create an IDataView object by loading the text file. + IDataView loadedData = mlContext.Data.LoadFromTextFile("data.tsv"); + + // Inspect the data that is loaded from the previously saved text file. + var loadedDataEnumerable = mlContext.Data.CreateEnumerable(loadedData, reuseRowObject: false); + foreach (DataPoint row in loadedDataEnumerable) + Console.WriteLine($"{row.Label}, {row.Features}"); + + // Preview of the loaded data. + // 0, 4 + // 0, 5 + // 0, 6 + // 1, 8 + // 1, 9 + } + + // Example with label and feature values. A data set is a collection of such examples. + private class DataPoint + { + public float Label { get; set; } + + public float Features { get; set; } + } + } +} + diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs index 91045b23e5..b4a48bd45a 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs @@ -37,6 +37,13 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, IMult /// /// The catalog. /// The path to the file to load from. + /// + /// + /// + /// + /// public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, string path) { Contracts.CheckNonEmpty(path, nameof(path)); @@ -54,6 +61,13 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, strin /// The data view to save. /// The stream to write to. /// Whether to keep hidden columns in the dataset. + /// + /// + /// + /// + /// public static void SaveAsBinary(this DataOperationsCatalog catalog, IDataView data, Stream stream, bool keepHidden = false) { diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 6297529c6f..e510c6df29 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -164,6 +164,13 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog cata /// The catalog. /// Specifies a file from which to load. /// Defines the settings of the load operation. + /// + /// + /// + /// + /// public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, string path, TextLoader.Options options = null) { @@ -186,6 +193,13 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, str /// Whether to write the header comment with the schema. /// Whether to keep hidden columns in the dataset. /// Whether to save columns in dense format even if they are sparse vectors. + /// + /// + /// + /// + /// public static void SaveAsText(this DataOperationsCatalog catalog, IDataView data, Stream stream,