From daeba69ff872dc0e9f6e5f40cf98c305c74481bc Mon Sep 17 00:00:00 2001 From: Gani Nazirov Date: Wed, 27 Feb 2019 15:10:45 -0800 Subject: [PATCH] OneHotEncoding sample --- .../Dynamic/OneHotEncodingTransform.cs | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/OneHotEncodingTransform.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/OneHotEncodingTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/OneHotEncodingTransform.cs new file mode 100644 index 0000000000..e0abc039e9 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/OneHotEncodingTransform.cs @@ -0,0 +1,62 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.OneHotEncodingTransformer; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class OneHotEncodingTransform + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Get a small dataset as an IEnumerable and convert it to an IDataView. + IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + var trainData = ml.Data.LoadFromEnumerable(data); + + // Preview of the data. + // + // Age Case Education Induced Parity PooledStratum RowNum ... + // 26 1 0-5yrs 1 6 3 1 ... + // 42 1 0-5yrs 1 1 1 2 ... + // 39 1 0-5yrs 2 6 4 3 ... + // 34 1 0-5yrs 2 4 2 4 ... + // 35 1 6-11yrs 1 3 32 5 ... + + // A pipeline for one hot encoding the Education column. + var pipeline = ml.Transforms.Categorical.OneHotEncoding("EducationOneHotEncoded", "Education", OutputKind.Bag); + // Fit to data. + var transformer = pipeline.Fit(trainData); + + // Get transformed data + var transformedData = transformer.Transform(trainData); + + // Getting the data of the newly created column, so we can preview it. + var encodedColumn = transformedData.GetColumn(ml, "EducationOneHotEncoded"); + + // A small printing utility. + Action> printHelper = (colName, column) => + { + foreach (var row in column) + { + for (var i = 0; i < row.Length; i++) + Console.Write($"{row[i]} "); + Console.WriteLine(); + } + }; + + printHelper("Education", encodedColumn); + + // data column obtained post-transformation. + // 1 0 0 0 ... + // 1 0 0 0 ... + // 1 0 0 0 ... + // 1 0 0 0 ... + // 0 1 0 0 ... + // .... + } + } +}