From c95dc9c0652a7e808b5061109dbb2e5304e89ea3 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 27 Feb 2019 14:37:30 -0800 Subject: [PATCH 1/3] Sample for ReplaceMissingValues. --- .../Transforms/ReplaceMissingValues.cs | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs new file mode 100644 index 0000000000..9774e6ae75 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs @@ -0,0 +1,38 @@ +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class ReplaceMissingValues + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + var mlContext = new MLContext(); + + // Download the training and validation files. + string dataFile = DatasetUtils.DownloadMslrWeb10k(); + + // Create the loader to load the data. + var loader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("GroupId", DataKind.String, 1), + new TextLoader.Column("Features", DataKind.Single, new[] { new TextLoader.Range(2, 138) }) + } + ); + + // Load the raw dataset. + var data = loader.Load(dataFile); + + // Create the featurization pipeline. First, hash the GroupId column. + var pipeline = mlContext.Transforms.Conversion.Hash("GroupId") + // Replace missing values in Features column with the default replacement value for its type. + .Append(mlContext.Transforms.ReplaceMissingValues("Features")); + + // Fit the pipeline and transform the dataset. + var transformedData = pipeline.Fit(data).Transform(data); + } + } +} From c5fdbefd3bc0cb4df0ed86c04c342f68b144078a Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 27 Feb 2019 14:40:51 -0800 Subject: [PATCH 2/3] add link to samples in documentation. --- src/Microsoft.ML.Transforms/ExtensionsCatalog.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index e57dab218e..6703876f8d 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -46,6 +46,13 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor /// Name of column to transform. If set to , the value of the will be used as source. /// If not provided, the will be replaced with the results of the transforms. /// The type of replacement to use as specified in + /// + /// + /// + /// + /// public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, From a3eae5e1886ef8ac9fc46800c8fb660f2b35817b Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 27 Feb 2019 14:45:30 -0800 Subject: [PATCH 3/3] More samples. --- .../ReplaceMissingValuesColumnOptions.cs | 38 +++++++++++++++++++ .../ExtensionsCatalog.cs | 7 ++++ 2 files changed, 45 insertions(+) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesColumnOptions.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesColumnOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesColumnOptions.cs new file mode 100644 index 0000000000..477bc8d649 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesColumnOptions.cs @@ -0,0 +1,38 @@ +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; +using Microsoft.ML.Transforms; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class ReplaceMissingValuesColumnOptions + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + var mlContext = new MLContext(); + + // Download the training and validation files. + string dataFile = DatasetUtils.DownloadMslrWeb10k(); + + // Create the loader to load the data. + var loader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("GroupId", DataKind.String, 1), + new TextLoader.Column("Features", DataKind.Single, new[] { new TextLoader.Range(2, 138) }) + } + ); + + // Load the raw dataset. + var data = loader.Load(dataFile); + // Create the featurization pipeline. First, hash the GroupId column. + var pipeline = mlContext.Transforms.Conversion.Hash("GroupId") + // Replace missing values in Features column with the default replacement value for its type. + .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnOptions("Features", "Features", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean))); + + // Fit the pipeline and transform the dataset. + var transformedData = pipeline.Fit(data).Transform(data); + } + } +} diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index 6703876f8d..e42fee60f1 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -65,6 +65,13 @@ public static MissingValueReplacingEstimator ReplaceMissingValues(this Transform /// /// The transform extensions' catalog. /// The name of the columns to use, and per-column transformation configuraiton. + /// + /// + /// + /// + /// public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnOptions[] columns) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns); }