diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs new file mode 100644 index 0000000000..9774e6ae75 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs @@ -0,0 +1,38 @@ +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class ReplaceMissingValues + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + var mlContext = new MLContext(); + + // Download the training and validation files. + string dataFile = DatasetUtils.DownloadMslrWeb10k(); + + // Create the loader to load the data. + var loader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("GroupId", DataKind.String, 1), + new TextLoader.Column("Features", DataKind.Single, new[] { new TextLoader.Range(2, 138) }) + } + ); + + // Load the raw dataset. + var data = loader.Load(dataFile); + + // Create the featurization pipeline. First, hash the GroupId column. + var pipeline = mlContext.Transforms.Conversion.Hash("GroupId") + // Replace missing values in Features column with the default replacement value for its type. + .Append(mlContext.Transforms.ReplaceMissingValues("Features")); + + // Fit the pipeline and transform the dataset. + var transformedData = pipeline.Fit(data).Transform(data); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesColumnOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesColumnOptions.cs new file mode 100644 index 0000000000..477bc8d649 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValuesColumnOptions.cs @@ -0,0 +1,38 @@ +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; +using Microsoft.ML.Transforms; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class ReplaceMissingValuesColumnOptions + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + var mlContext = new MLContext(); + + // Download the training and validation files. + string dataFile = DatasetUtils.DownloadMslrWeb10k(); + + // Create the loader to load the data. + var loader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("GroupId", DataKind.String, 1), + new TextLoader.Column("Features", DataKind.Single, new[] { new TextLoader.Range(2, 138) }) + } + ); + + // Load the raw dataset. + var data = loader.Load(dataFile); + // Create the featurization pipeline. First, hash the GroupId column. + var pipeline = mlContext.Transforms.Conversion.Hash("GroupId") + // Replace missing values in Features column with the default replacement value for its type. + .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnOptions("Features", "Features", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean))); + + // Fit the pipeline and transform the dataset. + var transformedData = pipeline.Fit(data).Transform(data); + } + } +} diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index e57dab218e..e42fee60f1 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -46,6 +46,13 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor /// Name of column to transform. If set to , the value of the will be used as source. /// If not provided, the will be replaced with the results of the transforms. /// The type of replacement to use as specified in + /// + /// + /// + /// + /// public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, @@ -58,6 +65,13 @@ public static MissingValueReplacingEstimator ReplaceMissingValues(this Transform /// /// The transform extensions' catalog. /// The name of the columns to use, and per-column transformation configuraiton. + /// + /// + /// + /// + /// public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnOptions[] columns) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns); }