From dabc37635c9a5adeb5fac8865389cb832fe58ee4 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Wed, 13 Mar 2019 11:19:33 -0700 Subject: [PATCH 1/8] internalizing of extension that contain columnoptions --- .../ConversionsExtensionsCatalog.cs | 27 ++++++++++++------- .../Transforms/ExtensionsCatalog.cs | 6 +++-- .../ExtensionsCatalog.cs | 15 +++++++---- .../MklComponentsCatalog.cs | 3 ++- src/Microsoft.ML.PCA/PCACatalog.cs | 3 ++- .../CategoricalCatalog.cs | 9 ++++--- .../ConversionsCatalog.cs | 3 ++- .../ExtensionsCatalog.cs | 6 +++-- .../FeatureSelectionCatalog.cs | 6 +++-- src/Microsoft.ML.Transforms/KernelCatalog.cs | 3 ++- .../NormalizerCatalog.cs | 9 ++++--- .../Text/TextCatalog.cs | 18 ++++++++----- 12 files changed, 72 insertions(+), 36 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index afa5c5ff29..40e01869e5 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -37,7 +37,8 @@ public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms /// /// The conversion transform's catalog. /// Description of dataset columns and how to process them. - public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnOptions[] columns) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -62,7 +63,8 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers /// /// The conversion transform's catalog. /// Description of dataset columns and how to process them. - public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnOptions[] columns) => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -92,7 +94,8 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// [!code-csharp[KeyToValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// ]]> /// - public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) + [BestFriend] + internal static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// @@ -100,7 +103,8 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// /// The conversion transform's catalog. /// The input column to map back to vectors. - public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, + [BestFriend] + internal static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, params KeyToVectorMappingEstimator.ColumnOptions[] columns) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); @@ -154,7 +158,8 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// ]]> /// /// - public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, + [BestFriend] + internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); @@ -178,7 +183,8 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs)] /// ]]> /// - public static ValueMappingEstimator MapValue( + [BestFriend] + internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, IEnumerable keys, IEnumerable values, @@ -203,7 +209,8 @@ public static ValueMappingEstimator MapValue /// - public static ValueMappingEstimator MapValue( + [BestFriend] + internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, IEnumerable keys, IEnumerable values, @@ -232,7 +239,8 @@ public static ValueMappingEstimator MapValue /// - public static ValueMappingEstimator MapValue( + [BestFriend] + internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, IEnumerable keys, IEnumerable values, @@ -258,7 +266,8 @@ public static ValueMappingEstimator MapValue /// - public static ValueMappingEstimator MapValue( + [BestFriend] + internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, IDataView lookupMap, string keyColumnName, string valueColumnName, params ColumnOptions[] columns) => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumnName, valueColumnName, diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index 5e79e5cba3..d3cb5c0abc 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -12,7 +12,8 @@ namespace Microsoft.ML /// /// Specifies input and output column names for a transformation. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { private readonly string _outputColumnName; private readonly string _inputColumnName; @@ -77,7 +78,8 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, /// ]]> /// /// - public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns) + [BestFriend] + internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns) => new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// diff --git a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs index 6868e82c63..222367ef9e 100644 --- a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs @@ -18,7 +18,8 @@ public static class ImageEstimatorsCatalog /// [!code-csharp[ConvertToGrayscale](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayscale.cs)] /// ]]> /// - public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns) + [BestFriend] + internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns) => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// @@ -41,7 +42,8 @@ public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalo /// [!code-csharp[LoadImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs)] /// ]]> /// - public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columns) + [BestFriend] + internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columns) => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, ColumnOptions.ConvertToValueTuples(columns)); /// @@ -75,7 +77,8 @@ public static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog /// /// The transform's catalog. /// The describing how the transform handles each image pixel extraction output input column pair. - public static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog catalog, params ImagePixelExtractingEstimator.ColumnOptions[] columnOptions) + [BestFriend] + internal static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog catalog, params ImagePixelExtractingEstimator.ColumnOptions[] columnOptions) => new ImagePixelExtractingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); /// @@ -133,7 +136,8 @@ public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog /// [!code-csharp[ResizeImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs)] /// ]]> /// - public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog, params ImageResizingEstimator.ColumnOptions[] columnOptions) + [BestFriend] + internal static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog, params ImageResizingEstimator.ColumnOptions[] columnOptions) => new ImageResizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); /// @@ -141,7 +145,8 @@ public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog /// /// The transform's catalog. /// The describing how the transform handles each vector to image conversion column pair. - public static VectorToImageConvertingEstimator ConvertToImage(this TransformsCatalog catalog, params VectorToImageConvertingEstimator.ColumnOptions[] columnOptions) + [BestFriend] + internal static VectorToImageConvertingEstimator ConvertToImage(this TransformsCatalog catalog, params VectorToImageConvertingEstimator.ColumnOptions[] columnOptions) => new VectorToImageConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); /// diff --git a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs index 7ba5c81031..85f6b22c28 100644 --- a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs +++ b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs @@ -165,7 +165,8 @@ public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catal /// ]]> /// /// - public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catalog, params VectorWhiteningEstimator.ColumnOptions[] columns) + [BestFriend] + internal static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catalog, params VectorWhiteningEstimator.ColumnOptions[] columns) => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns); } diff --git a/src/Microsoft.ML.PCA/PCACatalog.cs b/src/Microsoft.ML.PCA/PCACatalog.cs index 2cf6f5e1bb..3c1aafe6f7 100644 --- a/src/Microsoft.ML.PCA/PCACatalog.cs +++ b/src/Microsoft.ML.PCA/PCACatalog.cs @@ -35,7 +35,8 @@ public static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this Trans /// Initializes a new instance of . /// The transform's catalog. /// Input columns to apply PrincipalComponentAnalysis on. - public static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this TransformsCatalog catalog, params PrincipalComponentAnalyzer.ColumnOptions[] columns) + [BestFriend] + internal static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this TransformsCatalog catalog, params PrincipalComponentAnalyzer.ColumnOptions[] columns) => new PrincipalComponentAnalyzer(CatalogUtils.GetEnvironment(catalog), columns); /// diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index 6d17d4ed78..1c353657f7 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -31,7 +31,8 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate /// /// The transform catalog /// The column settings. - public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, + [BestFriend] + internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, params OneHotEncodingEstimator.ColumnOptions[] columns) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns); @@ -42,7 +43,8 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate /// The column settings. /// Specifies an ordering for the encoding. If specified, this should be a single column data view, /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting. - public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, + [BestFriend] + internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, OneHotEncodingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); @@ -72,7 +74,8 @@ public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCata /// /// The transform catalog /// The column settings. - public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, + [BestFriend] + internal static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, params OneHotHashEncodingEstimator.ColumnOptions[] columns) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } diff --git a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs index 6b4de2fbba..406cef8d2d 100644 --- a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs @@ -18,7 +18,8 @@ public static class ConversionsCatalog /// /// The categorical transform's catalog. /// Specifies the output and input columns on which the transformation should be applied. - public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, + [BestFriend] + internal static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index 56b1739034..2694d4cb8a 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -15,7 +15,8 @@ public static class ExtensionsCatalog /// /// The transform extensions' catalog. /// The names of the input columns of the transformation and the corresponding names for the output columns. - public static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, + [BestFriend] + internal static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, params ColumnOptions[] columns) => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); @@ -58,7 +59,8 @@ public static MissingValueReplacingEstimator ReplaceMissingValues(this Transform /// /// The transform extensions' catalog. /// The name of the columns to use, and per-column transformation configuraiton. - public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnOptions[] columns) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs index 6e15b9fa0b..3c59b738a4 100644 --- a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs +++ b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs @@ -25,7 +25,8 @@ public static class FeatureSelectionCatalog /// ]]> /// /// - public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog, + [BestFriend] + internal static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog, string labelColumnName = MutualInfoSelectDefaults.LabelColumn, int slotsInOutput = MutualInfoSelectDefaults.SlotsInOutput, int numberOfBins = MutualInfoSelectDefaults.NumBins, @@ -64,7 +65,8 @@ public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMu /// ]]> /// /// - public static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog, + [BestFriend] + internal static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog, params CountFeatureSelectingEstimator.ColumnOptions[] columns) => new CountFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), columns); diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index 52c2d2d072..8328476958 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -40,7 +40,8 @@ public static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this Tran /// /// The transform's catalog. /// The input columns to use for the transformation. - public static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this TransformsCatalog catalog, params ApproximatedKernelMappingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this TransformsCatalog catalog, params ApproximatedKernelMappingEstimator.ColumnOptions[] columns) => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index dbd0e81178..64e500d6c5 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -40,7 +40,8 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// ]]> /// /// - public static NormalizingEstimator Normalize(this TransformsCatalog catalog, + [BestFriend] + internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, NormalizingEstimator.NormalizationMode mode, params ColumnOptions[] columns) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, ColumnOptions.ConvertToValueTuples(columns)); @@ -79,7 +80,8 @@ public static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog /// /// The transform's catalog. /// Describes the parameters of the lp-normalization process for each column pair. - public static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, params LpNormNormalizingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, params LpNormNormalizingEstimator.ColumnOptions[] columns) => new LpNormNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -110,7 +112,8 @@ public static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this Tr /// /// The transform's catalog. /// Describes the parameters of the gcn-normaliztion process for each column pair. - public static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, params GlobalContrastNormalizingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, params GlobalContrastNormalizingEstimator.ColumnOptions[] columns) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs index ef56dbf065..240bbcea28 100644 --- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs +++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs @@ -72,7 +72,8 @@ public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this /// and append another marker character, , to the end of the output vector of characters. /// Pairs of columns to run the tokenization on. - public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog, bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters, params ColumnOptions[] columns) => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns)); @@ -146,7 +147,8 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T /// ]]> /// /// - public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.TextTransforms catalog, WordEmbeddingEstimator.PretrainedModelKind modelKind = WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding, params WordEmbeddingEstimator.ColumnOptions[] columns) => new WordEmbeddingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), modelKind, columns); @@ -170,7 +172,8 @@ public static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.T /// /// The text-related transform's catalog. /// Pairs of columns to run the tokenization on. - public static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.TextTransforms catalog, params WordTokenizingEstimator.ColumnOptions[] columns) => new WordTokenizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); @@ -210,7 +213,8 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text /// /// The text-related transform's catalog. /// Pairs of columns to run the ngram process on. - public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.TextTransforms catalog, params NgramExtractingEstimator.ColumnOptions[] columns) => new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); @@ -407,7 +411,8 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T /// /// The text-related transform's catalog. /// Pairs of columns to compute n-grams. Note that gram indices are generated by hashing. - public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, NgramHashingEstimator.ColumnOptions[] columns) => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); @@ -455,7 +460,8 @@ public static LatentDirichletAllocationEstimator LatentDirichletAllocation(this /// /// The transform's catalog. /// Describes the parameters of LDA for each column pair. - public static LatentDirichletAllocationEstimator LatentDirichletAllocation( + [BestFriend] + internal static LatentDirichletAllocationEstimator LatentDirichletAllocation( this TransformsCatalog.TextTransforms catalog, params LatentDirichletAllocationEstimator.ColumnOptions[] columns) => new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog), columns); From 52bad8c2337eb63d4eb8cb2a9879336717ca4d71 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Wed, 13 Mar 2019 11:31:03 -0700 Subject: [PATCH 2/8] internalization of columnoptions --- src/Microsoft.ML.Data/Transforms/Hashing.cs | 3 ++- .../Transforms/KeyToVector.cs | 3 ++- .../Transforms/Normalizer.cs | 23 ++++++++++++------- .../Transforms/TypeConverting.cs | 3 ++- .../Transforms/ValueToKeyMappingEstimator.cs | 6 +++-- .../ImagePixelExtractor.cs | 3 ++- .../ImageResizer.cs | 3 ++- .../VectorToImageTransform.cs | 3 ++- .../VectorWhitening.cs | 3 ++- src/Microsoft.ML.PCA/PcaTransformer.cs | 3 ++- .../CountFeatureSelection.cs | 3 ++- .../HashJoiningTransform.cs | 3 ++- .../MissingValueReplacing.cs | 3 ++- src/Microsoft.ML.Transforms/OneHotEncoding.cs | 3 ++- .../OneHotHashEncoding.cs | 3 ++- .../RandomFourierFeaturizing.cs | 3 ++- .../Text/LdaTransform.cs | 3 ++- .../Text/NgramHashingTransformer.cs | 3 ++- .../Text/NgramTransform.cs | 3 ++- .../Text/StopWordsRemovingTransformer.cs | 3 ++- .../Text/WordEmbeddingsExtractor.cs | 3 ++- .../Text/WordTokenizing.cs | 3 ++- 22 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 658930929e..dfc1b37028 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -1124,7 +1124,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs index f03d7758ea..7df837d5dc 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs @@ -736,7 +736,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 3e90ccb12d..15fed4b43b 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -64,7 +64,8 @@ public enum NormalizationMode SupervisedBinning = 4 } - public abstract class ColumnOptionsBase + [BestFriend] + internal abstract class ColumnOptionsBase { public readonly string Name; public readonly string InputColumnName; @@ -103,7 +104,7 @@ internal static ColumnOptionsBase Create(string outputColumnName, string inputCo } } - public abstract class ControlZeroColumnOptionsBase : ColumnOptionsBase + internal abstract class ControlZeroColumnOptionsBase : ColumnOptionsBase { public readonly bool EnsureZeroUntouched; @@ -114,7 +115,8 @@ private protected ControlZeroColumnOptionsBase(string outputColumnName, string i } } - public sealed class MinMaxColumnOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class MinMaxColumnOptions : ControlZeroColumnOptionsBase { public MinMaxColumnOptions(string outputColumnName, string inputColumnName = null, long maximumExampleCount = Defaults.MaximumExampleCount, bool ensureZeroUntouched = Defaults.EnsureZeroUntouched) : base(outputColumnName, inputColumnName ?? outputColumnName, maximumExampleCount, ensureZeroUntouched) @@ -125,7 +127,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MinMaxUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class MeanVarianceColumnOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class MeanVarianceColumnOptions : ControlZeroColumnOptionsBase { public readonly bool UseCdf; @@ -140,7 +143,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class LogMeanVarianceColumnOptions : ColumnOptionsBase + [BestFriend] + internal sealed class LogMeanVarianceColumnOptions : ColumnOptionsBase { public readonly bool UseCdf; @@ -155,7 +159,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.LogMeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class BinningColumnOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class BinningColumnOptions : ControlZeroColumnOptionsBase { public readonly int MaximumBinCount; @@ -170,7 +175,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.BinUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class SupervisedBinningColumOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class SupervisedBinningColumOptions : ControlZeroColumnOptionsBase { public readonly int MaximumBinCount; public readonly string LabelColumnName; @@ -309,7 +315,8 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(NormalizingTransformer).Assembly.FullName); } - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { public readonly string Name; public readonly string InputColumnName; diff --git a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs index f1d0b9c226..0127b65860 100644 --- a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs +++ b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs @@ -527,7 +527,8 @@ internal sealed class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs index 2a56c14555..2ba93bf56e 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs @@ -41,7 +41,8 @@ public enum KeyOrdinality : byte /// /// Describes how the transformer handles one column pair. /// - public abstract class ColumnOptionsBase + [BestFriend] + internal abstract class ColumnOptionsBase { public readonly string OutputColumnName; public readonly string InputColumnName; @@ -71,7 +72,8 @@ private protected ColumnOptionsBase(string outputColumnName, string inputColumnN /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions : ColumnOptionsBase + [BestFriend] + internal sealed class ColumnOptions : ColumnOptionsBase { /// /// Describes how the transformer handles one column pair. diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs index 172dcdacd5..f45605e6f0 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs @@ -563,7 +563,8 @@ internal static void GetOrder(ColorsOrder order, ColorBits colors, out int a, ou /// /// Describes how the transformer handles one image pixel extraction column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs index 0bd386a529..3fd167127c 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs @@ -461,7 +461,8 @@ public enum Anchor : byte /// /// Describes how the transformer handles one image resize column. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of public readonly string Name; diff --git a/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs b/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs index 620faceedb..043e63b489 100644 --- a/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs @@ -450,7 +450,8 @@ internal static class Defaults /// /// Describes how the transformer handles one vector to image conversion column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs b/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs index ba488f43cd..4e2cffc6d0 100644 --- a/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs +++ b/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs @@ -683,7 +683,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.PCA/PcaTransformer.cs b/src/Microsoft.ML.PCA/PcaTransformer.cs index 986176dbd5..5abc171c96 100644 --- a/src/Microsoft.ML.PCA/PcaTransformer.cs +++ b/src/Microsoft.ML.PCA/PcaTransformer.cs @@ -631,7 +631,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs index 4e26d853d5..07b40ffee7 100644 --- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs @@ -48,7 +48,8 @@ internal sealed class Options : TransformInputBase /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs index 73a3797011..1589763adb 100644 --- a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs @@ -106,7 +106,8 @@ internal bool TryUnparse(StringBuilder sb) } } - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { // Either VBuffer> or a single Key. // Note that if CustomSlotMap contains only one array, the output type of the transform will a single Key. diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs index 03776277b3..3b5c6710c2 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs @@ -901,7 +901,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// The possible ways to replace missing values. diff --git a/src/Microsoft.ML.Transforms/OneHotEncoding.cs b/src/Microsoft.ML.Transforms/OneHotEncoding.cs index f3dc07e9db..9309c664e5 100644 --- a/src/Microsoft.ML.Transforms/OneHotEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotEncoding.cs @@ -182,7 +182,8 @@ public enum OutputKind : byte /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions : ValueToKeyMappingEstimator.ColumnOptionsBase + [BestFriend] + internal sealed class ColumnOptions : ValueToKeyMappingEstimator.ColumnOptionsBase { public readonly OutputKind OutputKind; /// diff --git a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs index 2b96cc7ad4..41723d35ce 100644 --- a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs @@ -217,7 +217,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { public readonly HashingEstimator.ColumnOptions HashingOptions; public readonly OneHotEncodingEstimator.OutputKind OutputKind; diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index 1be12e2453..3d661580b4 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -618,7 +618,8 @@ internal static class Defaults /// /// Describes how the transformer handles one Gcn column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index a66d410c7e..4f406de70c 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -1005,7 +1005,8 @@ internal LatentDirichletAllocationEstimator(IHostEnvironment env, params ColumnO /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs index 21337758ca..f268f68192 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs @@ -873,7 +873,8 @@ public sealed class NgramHashingEstimator : IEstimator /// /// Describes how the transformer handles one pair of mulitple inputs - singular output columns. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 6e0cfb35f3..71b015b35a 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -794,7 +794,8 @@ internal static bool IsSchemaColumnValid(SchemaShape.Column col) /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs index 2244ff58bc..7faf7db26d 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs @@ -493,7 +493,8 @@ public sealed class StopWordsRemovingEstimator : TrivialEstimator /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs index 006ef05ee3..ad7e027e0e 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs @@ -829,7 +829,8 @@ public enum PretrainedModelKind /// /// Information for each column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs index b6278e824a..4f113c3e06 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs @@ -439,7 +439,8 @@ internal WordTokenizingEstimator(IHostEnvironment env, params ColumnOptions[] co : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(WordTokenizingEstimator)), new WordTokenizingTransformer(env, columns)) { } - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Output column name that will be used to store the tokenization result of column. From e63ad508e975d9f953a9476c9c1a3cd0a2889c67 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Thu, 14 Mar 2019 14:27:29 -0700 Subject: [PATCH 3/8] update the extensions to make all arguments available --- .../ConversionsExtensionsCatalog.cs | 102 +++++++++++++++++- .../Transforms/ExtensionsCatalog.cs | 8 +- .../ExtensionsCatalog.cs | 37 +++++++ .../MklComponentsCatalog.cs | 1 - .../CategoricalCatalog.cs | 28 +++-- .../ExtensionsCatalog.cs | 14 ++- src/Microsoft.ML.Transforms/KernelCatalog.cs | 6 +- .../NormalizerCatalog.cs | 3 +- .../Text/TextCatalog.cs | 76 ++++++++++--- 9 files changed, 236 insertions(+), 39 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 40e01869e5..9322c503c6 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -48,6 +48,7 @@ internal static HashingEstimator Hash(this TransformsCatalog.ConversionTransform /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. /// The expected kind of the output column. + /// New key count, if we work with key type. /// /// /// /// public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, - DataKind outputKind = ConvertDefaults.DefaultOutputKind) - => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputKind); + DataKind outputKind = ConvertDefaults.DefaultOutputKind, KeyCount outputKeyCount = null) + => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName, outputKeyCount) }); /// /// Changes column type of the input column. @@ -129,6 +130,10 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. /// Maximum number of keys to keep per column when auto-training. /// How items should be ordered when vectorized. If choosen they will be in the order encountered. /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). + /// Whether key value annotations should be text, regardless of the actual input type. + /// The data view containing the terms. If specified, this should be a single column data + /// view, and the key-values will be taken from that column. If unspecified, the key-values will be determined + /// from the input data upon fitting. /// /// /// new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality); + ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, + bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText, + IDataView keyData = null) + => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality) }, keyData); /// /// Converts value types into , optionally loading the keys to use from . @@ -163,6 +171,37 @@ internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog. ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); + /// + /// + /// + /// The key type. + /// The value type. + /// The conversion transform's catalog + /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and + /// cannot contain duplicate keys. + /// The list of values to pair with the keys for the mapping. The length of this list must be equal to the same length as . + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Whether to treat the values as a . + /// An instance of the + /// + /// + /// + /// + public static ValueMappingEstimator MapValue( + this TransformsCatalog.ConversionTransforms catalog, + IEnumerable keys, + IEnumerable values, + string outputColumnName, + string inputColumnName = null, + bool treatValuesAsKeyType = false) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// /// @@ -219,6 +258,36 @@ internal static ValueMappingEstimator MapValue new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, ColumnOptions.ConvertToValueTuples(columns)); + /// + /// + /// + /// The key type. + /// The value type. + /// The conversion transform's catalog + /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and + /// cannot contain duplicate keys. + /// The list of values to pair with the keys for the mapping of TOutputType[]. The length of this list must be equal to the same length as . + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// An instance of the + /// + /// + /// + /// + public static ValueMappingEstimator MapValue( + this TransformsCatalog.ConversionTransforms catalog, + IEnumerable keys, + IEnumerable values, + string outputColumnName, + string inputColumnName = null) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, + new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// /// @@ -248,6 +317,31 @@ internal static ValueMappingEstimator MapValue new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns)); + /// + /// + /// + /// The conversion transform's catalog + /// An instance of that contains the key and value columns. + /// Name of the key column in . + /// Name of the value column in . + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// A instance of the ValueMappingEstimator + /// + /// + /// + /// + public static ValueMappingEstimator MapValue( + this TransformsCatalog.ConversionTransforms catalog, + IDataView lookupMap, string keyColumnName, string valueColumnName, string outputColumnName, string inputColumnName = null) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumnName, valueColumnName, + new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// /// diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index d3cb5c0abc..28cc8b676f 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -21,12 +21,12 @@ internal sealed class ColumnOptions /// /// Specifies input and output column names for a transformation. /// - /// Name of output column resulting from the transformation of . - /// Name of input column. - public ColumnOptions(string outputColumnName, string inputColumnName) + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + public ColumnOptions(string outputColumnName, string inputColumnName = null) { _outputColumnName = outputColumnName; - _inputColumnName = inputColumnName; + _inputColumnName = inputColumnName ?? outputColumnName; } /// diff --git a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs index 222367ef9e..3b8dad98e8 100644 --- a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs @@ -9,6 +9,19 @@ namespace Microsoft.ML { public static class ImageEstimatorsCatalog { + /// + /// The transform's catalog. + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// + /// + /// + /// + public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null) + => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// The transform's catalog. /// Specifies the names of the input columns for the transformation, and their respective output column names. @@ -22,6 +35,30 @@ public static class ImageEstimatorsCatalog internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns) => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); + /// + /// Loads the images from the into memory. + /// + /// + /// The image get loaded in memory as a type. + /// Loading is the first step of almost every pipeline that does image processing, and further analysis on images. + /// The images to load need to be in the formats supported by . + /// For end-to-end image processing pipelines, and scenarios in your applications, see the + /// examples in the machinelearning-samples github repository. + /// + /// + /// The transform's catalog. + /// The images folder. + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// + /// + /// + /// + public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, string outputColumnName, string inputColumnName = null) + => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// Loads the images from the into memory. /// diff --git a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs index 85f6b22c28..0609f1cf67 100644 --- a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs +++ b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs @@ -168,6 +168,5 @@ public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catal [BestFriend] internal static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catalog, params VectorWhiteningEstimator.ColumnOptions[] columns) => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns); - } } diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index 1c353657f7..ab38237645 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -14,17 +14,26 @@ namespace Microsoft.ML public static class CategoricalCatalog { /// - /// Convert a text column into one-hot encoded vector. + /// Convert several text column into one-hot encoded vectors. /// /// The transform catalog /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. - /// The conversion mode. + /// Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector. + /// Maximum number of terms to keep per column when auto-training. + /// How items should be ordered when vectorized. If choosen they will be in the order encountered. + /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). + /// Specifies an ordering for the encoding. If specified, this should be a single column data view, + /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting. public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, - OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator) - => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputKind); + OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind, + int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, + ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, + IDataView keyData = null) + => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new OneHotEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality) }, keyData); /// /// Convert several text column into one-hot encoded vectors. @@ -56,6 +65,8 @@ internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Ca /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. /// Number of bits to hash into. Must be between 1 and 30, inclusive. + /// Hashing seed. + /// Whether the position of each term should be included in the hash. /// During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. @@ -64,10 +75,13 @@ internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Ca public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, + OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator, int numberOfBits = OneHotHashEncodingEstimator.Defaults.NumberOfBits, - int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts, - OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator) - => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, numberOfBits, maximumNumberOfInverts, outputKind); + uint seed = OneHotHashEncodingEstimator.Defaults.Seed, + bool useOrderedHashing = OneHotHashEncodingEstimator.Defaults.UseOrderedHashing, + int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts) + => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new OneHotHashEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts) }); /// /// Convert several text column into hash-based one-hot encoded vectors. diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index 2694d4cb8a..a6afea9eac 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -40,22 +40,26 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor /// (depending on whether the is given a value, or left to null) /// identical to the input column for everything but the missing values. The missing values of the input column, in this new column are replaced with /// one of the values specifid in the . The default for the is - /// . + /// . /// /// The transform extensions' catalog. /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. /// If not provided, the will be replaced with the results of the transforms. - /// The type of replacement to use as specified in + /// The type of replacement to use as specified in + /// If true, per-slot imputation of replacement is performed. + /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, + /// where imputation is always for the entire column. public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, - MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) - => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, replacementMode); + MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, + bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new MissingValueReplacingEstimator.ColumnOptions(outputColumnName, inputColumnName, replacementMode, imputeBySlot) }); /// /// Creates a new output column, identical to the input column for everything but the missing values. - /// The missing values of the input column, in this new column are replaced with . + /// The missing values of the input column, in this new column are replaced with . /// /// The transform extensions' catalog. /// The name of the columns to use, and per-column transformation configuraiton. diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index 8328476958..a393c4cccd 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -21,6 +21,8 @@ public static class KernelExpansionCatalog /// The number of random Fourier features to create. /// If , use both of cos and sin basis functions to create two features for every random Fourier frequency. /// Otherwise, only cos bases would be used. + /// Which fourier generator to use. + /// The seed of the random number generator for generating the new features (if unspecified, the global random is used). /// /// /// new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, rank, useCosAndSinBases); /// diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 64e500d6c5..9e66924b3e 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -51,7 +51,8 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// /// The transform catalog /// The normalization settings for all the columns - public static NormalizingEstimator Normalize(this TransformsCatalog catalog, + [BestFriend] + internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, params NormalizingEstimator.ColumnOptionsBase[] columns) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs index 240bbcea28..4aa28da763 100644 --- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs +++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs @@ -119,8 +119,8 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T /// /// The text-related transform's catalog. - /// Name of the column resulting from the transformation of . /// The path of the pre-trained embeedings model to use. + /// Name of the column resulting from the transformation of . /// Name of the column to transform. /// /// @@ -388,6 +388,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. + /// Whether to rehash unigrams. public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, string outputColumnName, string inputColumnName = null, @@ -397,10 +398,47 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T bool useAllLengths = NgramHashingEstimator.Defaults.UseAllLengths, uint seed = NgramHashingEstimator.Defaults.Seed, bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing, - int maximumNumberOfInverts = NgramHashingEstimator.Defaults.MaximumNumberOfInverts) + int maximumNumberOfInverts = NgramHashingEstimator.Defaults.MaximumNumberOfInverts, + bool rehashUnigrams = NgramHashingEstimator.Defaults.RehashUnigrams) + => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), + new[] {new NgramHashingEstimator.ColumnOptions(outputColumnName, new[] { inputColumnName }, ngramLength: ngramLength, skipLength: skipLength, + useAllLengths: useAllLengths, numberOfBits: numberOfBits, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts, rehashUnigrams) }); + + /// + /// Produces a bag of counts of hashed ngrams in + /// and outputs ngram vector as + /// + /// is different from in a way that + /// takes tokenized text as input while tokenizes text internally. + /// + /// The text-related transform's catalog. + /// Name of the column resulting from the transformation of . + /// Names of the columns to transform. If set to , the value of the will be used as source. + /// Number of bits to hash into. Must be between 1 and 30, inclusive. + /// Ngram length. + /// Maximum number of tokens to skip when constructing an ngram. + /// Whether to include all ngram lengths up to or only . + /// Hashing seed. + /// Whether the position of each source column should be included in the hash (when there are multiple source columns). + /// During hashing we constuct mappings between original values and the produced hash values. + /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. + /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. + /// 0 does not retain any input values. -1 retains all input values mapping to each hash. + /// Whether to rehash unigrams. + public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, + string outputColumnName, + string[] inputColumnNames = null, + int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits, + int ngramLength = NgramHashingEstimator.Defaults.NgramLength, + int skipLength = NgramHashingEstimator.Defaults.SkipLength, + bool useAllLengths = NgramHashingEstimator.Defaults.UseAllLengths, + uint seed = NgramHashingEstimator.Defaults.Seed, + bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing, + int maximumNumberOfInverts = NgramHashingEstimator.Defaults.MaximumNumberOfInverts, + bool rehashUnigrams = NgramHashingEstimator.Defaults.RehashUnigrams) => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), - outputColumnName, inputColumnName, numberOfBits: numberOfBits, ngramLength: ngramLength, skipLength: skipLength, - useAllLengths: useAllLengths, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts); + new[] {new NgramHashingEstimator.ColumnOptions(outputColumnName, inputColumnNames, ngramLength: ngramLength, skipLength: skipLength, + useAllLengths: useAllLengths, numberOfBits: numberOfBits, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts, rehashUnigrams) }); /// /// Produces a bag of counts of hashed ngrams for each . For each column, @@ -424,9 +462,16 @@ internal static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. /// The number of topics. + /// Dirichlet prior on document-topic vectors. + /// Dirichlet prior on vocab-topic vectors. + /// Number of Metropolis Hasting step. /// Number of iterations. + /// Compute log likelihood over local dataset on this iteration interval. + /// The number of training threads. Default value depends on number of logical processors. /// The threshold of maximum count of tokens per doc. /// The number of words to summarize the topic. + /// The number of burn-in iterations. + /// Reset the random number generator for each document. /// /// /// new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog), - outputColumnName, inputColumnName, numberOfTopics, - LatentDirichletAllocationEstimator.Defaults.AlphaSum, - LatentDirichletAllocationEstimator.Defaults.Beta, - LatentDirichletAllocationEstimator.Defaults.SamplingStepCount, - maximumNumberOfIterations, - LatentDirichletAllocationEstimator.Defaults.NumberOfThreads, - maximumTokenCountPerDocument, - numberOfSummaryTermsPerTopic, - LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval, - LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations, - LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator); + outputColumnName, inputColumnName, numberOfTopics, alphaSum, beta, samplingStepCount, + maximumNumberOfIterations, numberOfThreads, maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic, + likelihoodInterval, numberOfBurninIterations, resetRandomGenerator); /// /// Uses LightLDA to transform a document (represented as a vector of floats) From 2e276340eaae09e496d9bff75ec73d3a0feef5cf Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Thu, 14 Mar 2019 14:28:24 -0700 Subject: [PATCH 4/8] updated samples or moved to test folder to work with changes --- docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs | 3 ++- .../Dynamic/TensorFlow/TextClassification.cs | 4 ++-- .../Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs | 4 ++-- .../Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs | 2 +- .../Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs | 2 +- .../Dynamic/Transforms/ImageAnalytics/LoadImages.cs | 2 +- .../Dynamic/Transforms/ImageAnalytics/ResizeImages.cs | 2 +- docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs | 2 +- .../Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs | 2 +- .../Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs | 2 +- .../Dynamic/ValueMappingStringToKeyType.cs | 4 ++-- .../VectorWhitenWithColumnOptions.cs | 0 12 files changed, 15 insertions(+), 14 deletions(-) rename {docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection => test/Microsoft.ML.Tests/SamplesWithColumnOptions}/VectorWhitenWithColumnOptions.cs (100%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index c5421ac305..55f3c89845 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -58,7 +58,8 @@ public static void Example() // Composing a different pipeline if we wanted to normalize more than one column at a time. // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizationMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") }); + var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance) + .Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance)); // The transformed data. var multiColtransformer = multiColPipeline.Fit(trainData); var multiColtransformedData = multiColtransformer.Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs index b2b5363a8d..1ced6d2d7a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -69,10 +69,10 @@ public static void Example() }; var engine = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") })) + .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", "VariableLenghtFeatures", "TokenizedWords")) .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) .Append(tensorFlowModel.ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" })) - .Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax"))) + .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax")) .Fit(dataView) .CreatePredictionEngine(mlContext); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs index 1a1dcb7a54..d5cba7120b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs @@ -36,8 +36,8 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageObject", "ImagePath")) - .Append(mlContext.Transforms.ConvertToGrayscale(("Grayscale", "ImageObject"))); + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageObject", "ImagePath") + .Append(mlContext.Transforms.ConvertToGrayscale("Grayscale", "ImageObject")); var transformedData = pipeline.Fit(data).Transform(data); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs index 7f3e5d3c62..af69a3578c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs @@ -40,7 +40,7 @@ public static void Example() // Installing the Microsoft.ML.DNNImageFeaturizer packages copies the models in the // `DnnImageModels` folder. // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageObject", "ImagePath")) + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageObject", "ImagePath") .Append(mlContext.Transforms.ResizeImages("ImageObject", imageWidth: 224, imageHeight: 224)) .Append(mlContext.Transforms.ExtractPixels("Pixels", "ImageObject")) .Append(mlContext.Transforms.DnnFeaturizeImage("FeaturizedImage", m => m.ModelSelector.ResNet18(mlContext, m.OutputColumn, m.InputColumn), "Pixels")); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs index da6c583e13..188e36ca15 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs @@ -37,7 +37,7 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageObject", "ImagePath")) + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageObject", "ImagePath") .Append(mlContext.Transforms.ResizeImages("ImageObject", imageWidth: 100, imageHeight: 100 )) .Append(mlContext.Transforms.ExtractPixels("Pixels", "ImageObject")); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs index 80404e3ae7..f6fb4cae29 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs @@ -36,7 +36,7 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageReal", "ImagePath")); + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageReal", "ImagePath"); var transformedData = pipeline.Fit(data).Transform(data); // The transformedData IDataView contains the loaded images now diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs index b792aa9a8e..ca0d642e14 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs @@ -36,7 +36,7 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageReal", "ImagePath")) + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageReal", "ImagePath") .Append(mlContext.Transforms.ResizeImages("ImageReal", imageWidth: 100, imageHeight: 100)); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index 32e4106ba5..08636f412a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -55,7 +55,7 @@ public static void Example() }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, ("EducationCategory", "Education")); + var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, "EducationCategory", "Education"); // Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs index db4a944b37..c2e8fd1fca 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -46,7 +46,7 @@ public static void Example() }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature")); + var pipeline = mlContext.Transforms.Conversion.MapValue(temperatureKeys, classificationValues, "TemperatureCategory", "Temperature"); // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs index 8fafc547c5..b0fc8f68b4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -49,7 +49,7 @@ public static void Example() }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, ("EducationFeature", "Education")); + var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, "EducationFeature", "Education"); // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs index 260f17a720..42902697c8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -55,8 +55,8 @@ public static void Example() // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back // to the original value. - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, true, ("EducationKeyType", "Education")) - .Append(mlContext.Transforms.Conversion.MapKeyToValue(("EducationCategory", "EducationKeyType"))); + var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, "EducationKeyType", "Education", true) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("EducationCategory", "EducationKeyType")); // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs b/test/Microsoft.ML.Tests/SamplesWithColumnOptions/VectorWhitenWithColumnOptions.cs similarity index 100% rename from docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs rename to test/Microsoft.ML.Tests/SamplesWithColumnOptions/VectorWhitenWithColumnOptions.cs From 05909ad0733d650cc903b306ffa8fc0e14b1f610 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Thu, 14 Mar 2019 14:29:31 -0700 Subject: [PATCH 5/8] other miscellaneous changes to make it compile --- .../Transforms/TypeConverting.cs | 2 +- .../Transforms/ValueToKeyMappingEstimator.cs | 1 + .../TransformsStatic.cs | 16 +++--- .../MissingValueHandlingTransformer.cs | 4 +- .../MissingValueReplacing.cs | 56 +++++++++---------- .../Properties/AssemblyInfo.cs | 1 + .../Debugging.cs | 2 - .../Transformers/CategoricalTests.cs | 8 +-- .../Transformers/NAReplaceTests.cs | 24 ++++---- 9 files changed, 57 insertions(+), 57 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs index 0127b65860..1538d530e1 100644 --- a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs +++ b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs @@ -173,7 +173,7 @@ private static VersionInfo GetVersionInfo() /// /// A collection of describing the settings of the transformation. /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); + internal IReadOnlyCollection Columns => _columns.AsReadOnly(); private readonly TypeConvertingEstimator.ColumnOptions[] _columns; diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs index 2ba93bf56e..e3ae52f98a 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs @@ -18,6 +18,7 @@ internal static class Defaults { public const int MaximumNumberOfKeys = 1000000; public const KeyOrdinality Ordinality = KeyOrdinality.ByOccurrence; + public const bool AddKeyValueAnnotationsAsText = false; } /// diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 607a3439fa..b2f9596cd0 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -731,9 +731,9 @@ public static class NAReplacerStaticExtensions private readonly struct Config { public readonly bool ImputeBySlot; - public readonly MissingValueReplacingEstimator.ColumnOptions.ReplacementMode ReplacementMode; + public readonly MissingValueReplacingEstimator.ReplacementMode ReplacementMode; - public Config(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, + public Config(MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { ImputeBySlot = imputeBySlot; @@ -814,7 +814,7 @@ public override IEstimator Reconcile(IHostEnvironment env, /// /// Incoming data. /// How NaN should be replaced - public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutScalar(input, new Config(replacementMode, false)); @@ -825,7 +825,7 @@ public static Scalar ReplaceNaNValues(this Scalar input, MissingVa /// /// Incoming data. /// How NaN should be replaced - public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutScalar(input, new Config(replacementMode, false)); @@ -838,7 +838,7 @@ public static Scalar ReplaceNaNValues(this Scalar input, Missing /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { Contracts.CheckValue(input, nameof(input)); return new OutVectorColumn(input, new Config(replacementMode, imputeBySlot)); @@ -852,7 +852,7 @@ public static Vector ReplaceNaNValues(this Vector input, MissingVa /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { Contracts.CheckValue(input, nameof(input)); return new OutVectorColumn(input, new Config(replacementMode, imputeBySlot)); @@ -863,7 +863,7 @@ public static Vector ReplaceNaNValues(this Vector input, Missing /// /// Incoming data. /// How NaN should be replaced - public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutVarVectorColumn(input, new Config(replacementMode, false)); @@ -873,7 +873,7 @@ public static VarVector ReplaceNaNValues(this VarVector input, Mis /// /// Incoming data. /// How NaN should be replaced - public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutVarVectorColumn(input, new Config(replacementMode, false)); diff --git a/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs index 83f1dd9ac8..1dd8390c36 100644 --- a/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs @@ -154,7 +154,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa if (!addInd) { replaceCols.Add(new MissingValueReplacingEstimator.ColumnOptions(column.Name, column.Source, - (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); + (MissingValueReplacingEstimator.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); continue; } @@ -189,7 +189,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa // Add the NAReplaceTransform column. replaceCols.Add(new MissingValueReplacingEstimator.ColumnOptions(tmpReplacementColName, column.Source, - (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); + (MissingValueReplacingEstimator.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); // Add the ConcatTransform column. if (replaceType is VectorType) diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs index 3b5c6710c2..e6b83c56d3 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs @@ -118,7 +118,7 @@ internal sealed class Options : TransformInputBase public Column[] Columns; [Argument(ArgumentType.AtMostOnce, HelpText = "The replacement method to utilize", ShortName = "kind")] - public ReplacementKind ReplacementKind = (ReplacementKind)MissingValueReplacingEstimator.Defaults.ReplacementMode; + public ReplacementKind ReplacementKind = (ReplacementKind)MissingValueReplacingEstimator.Defaults.Mode; // Specifying by-slot imputation for vectors of unknown size will cause a warning, and the imputation will be global. [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to impute values by slot", ShortName = "slot")] @@ -442,7 +442,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa cols[i] = new MissingValueReplacingEstimator.ColumnOptions( item.Name, item.Source, - (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(item.Kind ?? options.ReplacementKind), + (MissingValueReplacingEstimator.ReplacementMode)(item.Kind ?? options.ReplacementKind), item.Slot ?? options.ImputeBySlot, item.ReplacementString); }; @@ -891,10 +891,33 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src public sealed class MissingValueReplacingEstimator : IEstimator { + /// + /// The possible ways to replace missing values. + /// + public enum ReplacementMode : byte + { + /// + /// Replace with the default value of the column based on its type. For example, 'zero' for numeric and 'empty' for string/text columns. + /// + DefaultValue = 0, + /// + /// Replace with the mean value of the column. Supports only numeric/time span/ DateTime columns. + /// + Mean = 1, + /// + /// Replace with the minimum value of the column. Supports only numeric/time span/ DateTime columns. + /// + Minimum = 2, + /// + /// Replace with the maximum value of the column. Supports only numeric/time span/ DateTime columns. + /// + Maximum = 3, + } + [BestFriend] internal static class Defaults { - public const ColumnOptions.ReplacementMode ReplacementMode = ColumnOptions.ReplacementMode.DefaultValue; + public const ReplacementMode Mode = ReplacementMode.DefaultValue; public const bool ImputeBySlot = true; } @@ -904,29 +927,6 @@ internal static class Defaults [BestFriend] internal sealed class ColumnOptions { - /// - /// The possible ways to replace missing values. - /// - public enum ReplacementMode : byte - { - /// - /// Replace with the default value of the column based on its type. For example, 'zero' for numeric and 'empty' for string/text columns. - /// - DefaultValue = 0, - /// - /// Replace with the mean value of the column. Supports only numeric/time span/ DateTime columns. - /// - Mean = 1, - /// - /// Replace with the minimum value of the column. Supports only numeric/time span/ DateTime columns. - /// - Minimum = 2, - /// - /// Replace with the maximum value of the column. Supports only numeric/time span/ DateTime columns. - /// - Maximum = 3, - } - /// Name of the column resulting from the transformation of . public readonly string Name; /// Name of column to transform. @@ -951,7 +951,7 @@ public enum ReplacementMode : byte /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public ColumnOptions(string name, string inputColumnName = null, ReplacementMode replacementMode = Defaults.ReplacementMode, + public ColumnOptions(string name, string inputColumnName = null, ReplacementMode replacementMode = Defaults.Mode, bool imputeBySlot = Defaults.ImputeBySlot) { Contracts.CheckNonWhiteSpace(name, nameof(name)); @@ -975,7 +975,7 @@ internal ColumnOptions(string name, string inputColumnName, ReplacementMode repl private readonly IHost _host; private readonly ColumnOptions[] _columns; - internal MissingValueReplacingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, ColumnOptions.ReplacementMode replacementKind = Defaults.ReplacementMode) + internal MissingValueReplacingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, ReplacementMode replacementKind = Defaults.Mode) : this(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, replacementKind)) { diff --git a/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs index b9317da64c..3ef4273ed9 100644 --- a/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs @@ -17,5 +17,6 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.TextAnalytics" + InternalPublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TimeSeries" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TestFramework" + PublicKey.TestValue)] [assembly: WantsToBeBestFriends] diff --git a/test/Microsoft.ML.Functional.Tests/Debugging.cs b/test/Microsoft.ML.Functional.Tests/Debugging.cs index a495c99c99..9253e73ebe 100644 --- a/test/Microsoft.ML.Functional.Tests/Debugging.cs +++ b/test/Microsoft.ML.Functional.Tests/Debugging.cs @@ -105,7 +105,6 @@ public void InspectPipelineSchema() // Define a pipeline var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize()) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca( new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 20 })); @@ -173,7 +172,6 @@ public void ViewTrainingOutput() // Define a pipeline var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize()) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca( new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 20 })); diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs index f40df33a64..77eb4da72f 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs @@ -76,10 +76,10 @@ public void CategoricalOneHotHashEncoding() var mlContext = new MLContext(); var dataView = mlContext.Data.LoadFromEnumerable(data); - var pipe = mlContext.Transforms.Categorical.OneHotHashEncoding("CatA", "A", 3, 0, OneHotEncodingEstimator.OutputKind.Bag) - .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatB", "A", 2, 0, OneHotEncodingEstimator.OutputKind.Key)) - .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatC", "A", 3, 0, OneHotEncodingEstimator.OutputKind.Indicator)) - .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatD", "A", 2, 0, OneHotEncodingEstimator.OutputKind.Binary)); + var pipe = mlContext.Transforms.Categorical.OneHotHashEncoding("CatA", "A", OneHotEncodingEstimator.OutputKind.Bag, 3, 0) + .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatB", "A", OneHotEncodingEstimator.OutputKind.Key, 2, 0)) + .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatC", "A", OneHotEncodingEstimator.OutputKind.Indicator, 3, 0)) + .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatD", "A", OneHotEncodingEstimator.OutputKind.Binary, 2, 0)); TestEstimatorCore(pipe, dataView); Done(); diff --git a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs index ffe79ad181..fc55126dc6 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs @@ -43,10 +43,10 @@ public void NAReplaceWorkout() var dataView = ML.Data.LoadFromEnumerable(data); var pipe = ML.Transforms.ReplaceMissingValues( - new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean)); + new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ReplacementMode.Mean)); TestEstimatorCore(pipe, dataView); Done(); } @@ -68,10 +68,10 @@ public void NAReplaceStatic() var est = data.MakeNewEstimator(). Append(row => ( - A: row.ScalarFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Maximum), - B: row.ScalarDouble.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - C: row.VectorFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - D: row.VectorDoulbe.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Minimum) + A: row.ScalarFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Maximum), + B: row.ScalarDouble.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Mean), + C: row.VectorFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Mean), + D: row.VectorDoulbe.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Minimum) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); @@ -104,10 +104,10 @@ public void TestOldSavingAndLoading() var dataView = ML.Data.LoadFromEnumerable(data); var pipe = ML.Transforms.ReplaceMissingValues( - new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean)); + new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ReplacementMode.Mean)); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); From b24f9f5c0bd20e1c87dc78bb4cac3683e9c05335 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Mon, 18 Mar 2019 17:01:50 -0700 Subject: [PATCH 6/8] review comments --- .../Dynamic/TensorFlow/TextClassification.cs | 2 +- .../Projection/VectorWhitenWithOptions.cs | 5 +- .../Dynamic/ValueMapping.cs | 22 ++--- .../Dynamic/ValueMappingFloatToString.cs | 22 ++--- .../Dynamic/ValueMappingStringToArray.cs | 20 ++-- .../Dynamic/ValueMappingStringToKeyType.cs | 20 ++-- .../ConversionsExtensionsCatalog.cs | 92 ++++++++++--------- .../ExtensionsCatalog.cs | 4 +- .../MklComponentsCatalog.cs | 3 +- src/Microsoft.ML.Transforms/KernelCatalog.cs | 3 +- .../Transformers/ValueMappingTests.cs | 32 ++++--- 11 files changed, 105 insertions(+), 120 deletions(-) rename test/Microsoft.ML.Tests/SamplesWithColumnOptions/VectorWhitenWithColumnOptions.cs => docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs (90%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs index 1ced6d2d7a..b503b3a797 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -69,7 +69,7 @@ public static void Example() }; var engine = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", "VariableLenghtFeatures", "TokenizedWords")) + .Append(mlContext.Transforms.Conversion.MapValue("VariableLenghtFeatures", lookupMap, "Words", "Ids", "TokenizedWords")) .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) .Append(tensorFlowModel.ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" })) .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax")) diff --git a/test/Microsoft.ML.Tests/SamplesWithColumnOptions/VectorWhitenWithColumnOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs similarity index 90% rename from test/Microsoft.ML.Tests/SamplesWithColumnOptions/VectorWhitenWithColumnOptions.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs index eaec189bc4..bf314064e1 100644 --- a/test/Microsoft.ML.Tests/SamplesWithColumnOptions/VectorWhitenWithColumnOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic { - public sealed class VectorWhitenWithColumnOptions + public sealed class VectorWhitenWithOptions { /// This example requires installation of additional nuget package Microsoft.ML.Mkl.Components. public static void Example() @@ -39,8 +39,7 @@ public static void Example() // A pipeline to project Features column into white noise vector. - var whiteningPipeline = ml.Transforms.VectorWhiten(new Transforms.VectorWhiteningEstimator.ColumnOptions( - nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4)); + var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4); // The transformed (projected) data. var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index 08636f412a..030cc43e42 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -38,24 +38,16 @@ public static void Example() // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the Education values from the dataset. - var educationKeys = new List() + // Creating a list of key-value pairs based on the Education values from the dataset. + var educationKeyValuePairs = new List>() { - "0-5yrs", - "6-11yrs", - "12+yrs" + new KeyValuePair("0-5yrs", "Undergraduate"), + new KeyValuePair("6-11yrs", "Postgraduate"), + new KeyValuePair("12+yrs", "Postgraduate") }; - - // Creating a list of associated values that will map respectively to each educationKey - var educationValues = new List() - { - "Undergraduate", - "Postgraduate", - "Postgraduate" - }; - + // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, "EducationCategory", "Education"); + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationCategory", educationKeyValuePairs, "Education"); // Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs index c2e8fd1fca..c31a435c42 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -29,24 +29,16 @@ public static void Example() IDataView trainData = mlContext.Data.LoadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the induced value from the dataset - var temperatureKeys = new List() + // Creating a list of key-value pairs based on the induced value from the dataset + var temperatureKeyValuePairs = new List>() { - 36.0f, - 35.0f, - 34.0f + new KeyValuePair(36.0f, "T1"), + new KeyValuePair(35.0f, "T2"), + new KeyValuePair(34.0f, "T3") }; - - // Creating a list of values, these strings will map accordingly to each key. - var classificationValues = new List() - { - "T1", - "T2", - "T3" - }; - + // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(temperatureKeys, classificationValues, "TemperatureCategory", "Temperature"); + var pipeline = mlContext.Transforms.Conversion.MapValue("TemperatureCategory", temperatureKeyValuePairs, "Temperature"); // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs index b0fc8f68b4..0fd6d2fddd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -32,24 +32,16 @@ public static void Example() IDataView trainData = mlContext.Data.LoadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the Education values from the dataset - var educationKeys = new List() + // Creating a list of key-value pairs based on the Education values from the dataset + var educationKeyValuePairs = new List>() { - "0-5yrs", - "6-11yrs", - "12+yrs" - }; - - // Sample list of associated array values - var educationValues = new List() - { - new int[] { 1,2,3 }, - new int[] { 5,6,7 }, - new int[] { 42,32,64 } + new KeyValuePair("0-5yrs", new int[] { 1,2,3 }), + new KeyValuePair("6-11yrs", new int[] { 1,2,3 }), + new KeyValuePair("12+yrs", new int[] { 1,2,3 }) }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, "EducationFeature", "Education"); + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationFeature", educationKeyValuePairs, "Education"); // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs index 42902697c8..2d8ab74e12 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -35,27 +35,19 @@ public static void Example() IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); IDataView trainData = mlContext.Data.LoadFromEnumerable(data); - // Creating a list of keys based on the Education values from the dataset + // Creating a list of key-value pairs based on the Education values from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. - var educationKeys = new List() + var educationKeyValuePairs = new List>() { - "0-5yrs", - "6-11yrs", - "12+yrs" - }; - - // Creating a list of values that are sample strings. These will be converted to KeyTypes - var educationValues = new List() - { - "Undergraduate", - "Postgraduate", - "Postgraduate" + new KeyValuePair("0-5yrs", "Undergraduate"), + new KeyValuePair("6-11yrs", "Postgraduate"), + new KeyValuePair("12+yrs", "Postgraduate") }; // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back // to the original value. - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, "EducationKeyType", "Education", true) + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationKeyType", educationKeyValuePairs, "Education", true) .Append(mlContext.Transforms.Conversion.MapKeyToValue("EducationCategory", "EducationKeyType")); // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 9322c503c6..89f81cc4b4 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections.Generic; +using System.Linq; using Microsoft.Data.DataView; using Microsoft.ML.Data; using Microsoft.ML.Transforms; @@ -48,7 +49,6 @@ internal static HashingEstimator Hash(this TransformsCatalog.ConversionTransform /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. /// The expected kind of the output column. - /// New key count, if we work with key type. /// /// /// /// public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, - DataKind outputKind = ConvertDefaults.DefaultOutputKind, KeyCount outputKeyCount = null) - => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName, outputKeyCount) }); + DataKind outputKind = ConvertDefaults.DefaultOutputKind) + => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName) }); /// /// Changes column type of the input column. @@ -149,7 +149,7 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), - new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality) }, keyData); + new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData); /// /// Converts value types into , optionally loading the keys to use from . @@ -177,10 +177,8 @@ internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog. /// The key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping. The length of this list must be equal to the same length as . /// Name of the column resulting from the transformation of . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// Name of the column to transform. If set to , the value of the will be used as source. /// Whether to treat the values as a . /// An instance of the @@ -195,12 +193,16 @@ internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog. /// public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, string outputColumnName, + IEnumerable> keyValuePairs, string inputColumnName = null, bool treatValuesAsKeyType = false) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, + new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + } /// /// @@ -208,9 +210,7 @@ public static ValueMappingEstimator MapValueThe key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping. The length of this list must be equal to the same length as . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// The columns to apply this transform on. /// An instance of the /// @@ -225,10 +225,13 @@ public static ValueMappingEstimator MapValue MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, + IEnumerable> keyValuePairs, params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns)); + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns)); + } /// /// @@ -236,9 +239,7 @@ internal static ValueMappingEstimator MapValueThe key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping. The length of this list must be equal to the same length as . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// Whether to treat the values as a . /// The columns to apply this transform on. /// An instance of the @@ -251,12 +252,15 @@ internal static ValueMappingEstimator MapValue MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, + IEnumerable> keyValuePairs, bool treatValuesAsKeyType, params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, - ColumnOptions.ConvertToValueTuples(columns)); + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, + ColumnOptions.ConvertToValueTuples(columns)); + } /// /// @@ -264,10 +268,8 @@ internal static ValueMappingEstimator MapValueThe key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping of TOutputType[]. The length of this list must be equal to the same length as . /// Name of the column resulting from the transformation of . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// Name of the column to transform. If set to , the value of the will be used as source. /// An instance of the /// @@ -281,12 +283,15 @@ internal static ValueMappingEstimator MapValue public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, string outputColumnName, + IEnumerable> keyValuePairs, string inputColumnName = null) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + } /// /// @@ -294,9 +299,7 @@ public static ValueMappingEstimator MapValueThe key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping of TOutputType[]. The length of this list must be equal to the same length as . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// The columns to apply this transform on. /// An instance of the /// @@ -311,20 +314,23 @@ public static ValueMappingEstimator MapValue MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, + IEnumerable> keyValuePairs, params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, - ColumnOptions.ConvertToValueTuples(columns)); + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, + ColumnOptions.ConvertToValueTuples(columns)); + } /// /// /// /// The conversion transform's catalog - /// An instance of that contains the key and value columns. - /// Name of the key column in . - /// Name of the value column in . /// Name of the column resulting from the transformation of . + /// An instance of that contains the key and value columns. + /// The key column in . + /// The value column in . /// Name of the column to transform. If set to , the value of the will be used as source. /// A instance of the ValueMappingEstimator /// @@ -338,9 +344,11 @@ internal static ValueMappingEstimator MapValue public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IDataView lookupMap, string keyColumnName, string valueColumnName, string outputColumnName, string inputColumnName = null) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumnName, valueColumnName, - new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + string outputColumnName, IDataView lookupMap, string keyColumn, string valueColumn, string inputColumnName = null) + { + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn, valueColumn, + new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + } /// /// diff --git a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs index 3b8dad98e8..dfcac547fb 100644 --- a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs @@ -47,8 +47,8 @@ internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCata /// /// /// The transform's catalog. - /// The images folder. /// Name of the column resulting from the transformation of . + /// The images folder. /// Name of the column to transform. If set to , the value of the will be used as source. /// /// @@ -56,7 +56,7 @@ internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCata /// [!code-csharp[LoadImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs)] /// ]]> /// - public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, string outputColumnName, string inputColumnName = null) + public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, string inputColumnName = null) => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); /// diff --git a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs index 0609f1cf67..10e144907b 100644 --- a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs +++ b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs @@ -142,6 +142,7 @@ public static SymbolicSgdTrainer SymbolicSgd( /// /// /// /// @@ -161,7 +162,7 @@ public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catal /// /// /// /// /// diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index a393c4cccd..e399038833 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -37,7 +37,8 @@ public static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this Tran bool useCosAndSinBases = ApproximatedKernelMappingEstimator.Defaults.UseCosAndSinBases, KernelBase generator = null, int? seed = null) - => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, rank, useCosAndSinBases); + => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new ApproximatedKernelMappingEstimator.ColumnOptions(outputColumnName, rank, useCosAndSinBases, inputColumnName, generator, seed) }); /// /// Takes columns filled with a vector of floats and maps its to a random low-dimensional feature space. diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index c98617c8a8..392f55271b 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -507,11 +507,15 @@ public void ValueMappingWorkout() var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } }; var badDataView = ML.Data.LoadFromEnumerable(badData); - var keys = new List() { "foo", "bar", "test", "wahoo" }; - var values = new List() { 1, 2, 3, 4 }; + var keyValuePairs = new List>() { + new KeyValuePair("foo", 1), + new KeyValuePair("bar", 2), + new KeyValuePair("test", 3), + new KeyValuePair("wahoo", 4) + }; // Workout on value mapping - var est = ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); + var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } @@ -523,14 +527,14 @@ public void ValueMappingValueTypeIsVectorWorkout() var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } }; var badDataView = ML.Data.LoadFromEnumerable(badData); - var keys = new List() { "foo", "bar", "test" }; - var values = new List() { - new int[] {2, 3, 4 }, - new int[] {100, 200 }, - new int[] {400, 500, 600, 700 }}; + var keyValuePairs = new List>() { + new KeyValuePair("foo", new int[] {2, 3, 4 }), + new KeyValuePair("bar", new int[] {100, 200 }), + new KeyValuePair("test", new int[] {400, 500, 600, 700 }), + }; // Workout on value mapping - var est = ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); + var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } @@ -543,11 +547,15 @@ public void ValueMappingInputIsVectorWorkout() var badData = new[] { new TestWrong() { B = 1.2f } }; var badDataView = ML.Data.LoadFromEnumerable(badData); - var keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; - var values = new List() { 1, 2, 3, 4 }; + var keyValuePairs = new List,int>>() { + new KeyValuePair,int>("foo".AsMemory(), 1), + new KeyValuePair,int>("bar".AsMemory(), 2), + new KeyValuePair,int>("test".AsMemory(), 3), + new KeyValuePair,int>("wahoo".AsMemory(), 4) + }; var est = ML.Transforms.Text.TokenizeIntoWords("TokenizeB", "B") - .Append(ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("VecB", "TokenizeB") })); + .Append(ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("VecB", "TokenizeB") })); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } From 50b99e478460571a2a23b0ec0d0e2c455083d781 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Mon, 18 Mar 2019 23:23:12 -0700 Subject: [PATCH 7/8] review comments --- .../Dynamic/TensorFlow/TextClassification.cs | 3 ++- .../Microsoft.ML.Samples/Dynamic/ValueMapping.cs | 12 +++++------- .../Dynamic/ValueMappingFloatToString.cs | 14 ++++++-------- .../Dynamic/ValueMappingStringToArray.cs | 12 +++++------- .../Dynamic/ValueMappingStringToKeyType.cs | 12 +++++------- .../Transforms/ConversionsExtensionsCatalog.cs | 16 ++++++++-------- .../TensorflowTests.cs | 3 ++- 7 files changed, 33 insertions(+), 39 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs index ab4d3f0f6c..3fe3f169ed 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -69,7 +69,8 @@ public static void Example() }; var model = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.MapValue("VariableLenghtFeatures", lookupMap, "Words", "Ids", "TokenizedWords")) + .Append(mlContext.Transforms.Conversion.MapValue("VariableLenghtFeatures", lookupMap, + lookupMap.Schema["Words"], lookupMap.Schema["Ids"], "TokenizedWords")) .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) .Append(tensorFlowModel.ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" })) .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax")) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index f94cd1c375..2df356760a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -38,15 +38,13 @@ public static void Example() // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView // Creating a list of key-value pairs based on the Education values from the dataset. - var educationKeyValuePairs = new List>() - { - new KeyValuePair("0-5yrs", "Undergraduate"), - new KeyValuePair("6-11yrs", "Postgraduate"), - new KeyValuePair("12+yrs", "Postgraduate") - }; + var educationMap = new Dictionary (); + educationMap["0-5yrs"] = "Undergraduate"; + educationMap["6-11yrs"] = "Postgraduate"; + educationMap["12+yrs"] = "Postgraduate"; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("EducationCategory", educationKeyValuePairs, "Education"); + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationCategory", educationMap, "Education"); // Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs index 8e6f6af8e3..5cf34572ba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -29,15 +29,13 @@ public static void Example() // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView // Creating a list of key-value pairs based on the induced value from the dataset - var temperatureKeyValuePairs = new List>() - { - new KeyValuePair(36.0f, "T1"), - new KeyValuePair(35.0f, "T2"), - new KeyValuePair(34.0f, "T3") - }; - + var temperatureMap = new Dictionary(); + temperatureMap[36.0f] = "T1"; + temperatureMap[35.0f] = "T2"; + temperatureMap[34.0f] = "T3"; + // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("TemperatureCategory", temperatureKeyValuePairs, "Temperature"); + var pipeline = mlContext.Transforms.Conversion.MapValue("TemperatureCategory", temperatureMap, "Temperature"); // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs index 8c11009a2f..f008d559d8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -32,15 +32,13 @@ public static void Example() // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView // Creating a list of key-value pairs based on the Education values from the dataset - var educationKeyValuePairs = new List>() - { - new KeyValuePair("0-5yrs", new int[] { 1,2,3 }), - new KeyValuePair("6-11yrs", new int[] { 1,2,3 }), - new KeyValuePair("12+yrs", new int[] { 1,2,3 }) - }; + var educationMap = new Dictionary(); + educationMap["0-5yrs"] = new int[] { 1, 2, 3 }; + educationMap["6-11yrs"] = new int[] { 5, 6, 7 }; + educationMap["12+yrs"] = new int[] { 42, 32, 64 }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("EducationFeature", educationKeyValuePairs, "Education"); + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationFeature", educationMap, "Education"); // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs index 5d04b54917..8c01d35e78 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -36,17 +36,15 @@ public static void Example() // Creating a list of key-value pairs based on the Education values from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. - var educationKeyValuePairs = new List>() - { - new KeyValuePair("0-5yrs", "Undergraduate"), - new KeyValuePair("6-11yrs", "Postgraduate"), - new KeyValuePair("12+yrs", "Postgraduate") - }; + var educationMap = new Dictionary(); + educationMap["0-5yrs"] = "Undergraduate"; + educationMap["6-11yrs"] = "Postgraduate"; + educationMap["12+yrs"] = "Postgraduate"; // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back // to the original value. - var pipeline = mlContext.Transforms.Conversion.MapValue("EducationKeyType", educationKeyValuePairs, "Education", true) + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationKeyType", educationMap, "Education", true) .Append(mlContext.Transforms.Conversion.MapKeyToValue("EducationCategory", "EducationKeyType")); // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index be716bd3f4..75bf80198d 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -327,7 +327,7 @@ internal static ValueMappingEstimator MapValue /// The conversion transform's catalog /// Name of the column resulting from the transformation of . - /// An instance of that contains the key and value columns. + /// An instance of that contains the and columns. /// The key column in . /// The value column in . /// Name of the column to transform. If set to , the value of the will be used as source. @@ -343,9 +343,9 @@ internal static ValueMappingEstimator MapValue public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - string outputColumnName, IDataView lookupMap, string keyColumn, string valueColumn, string inputColumnName = null) + string outputColumnName, IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, string inputColumnName = null) { - return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn, valueColumn, + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); } @@ -353,9 +353,9 @@ public static ValueMappingEstimator MapValue( /// /// /// The conversion transform's catalog - /// An instance of that contains the key and value columns. - /// Name of the key column in . - /// Name of the value column in . + /// An instance of that contains the and columns. + /// Name of the key column in . + /// Name of the value column in . /// The columns to apply this transform on. /// A instance of the ValueMappingEstimator /// @@ -370,8 +370,8 @@ public static ValueMappingEstimator MapValue( [BestFriend] internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IDataView lookupMap, string keyColumnName, string valueColumnName, params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumnName, valueColumnName, + IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params ColumnOptions[] columns) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name, ColumnOptions.ConvertToValueTuples(columns)); } } diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index b8c417b87b..87425b06c4 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -999,7 +999,8 @@ public void TensorFlowSentimentClassificationTest() // Then this integer vector is retrieved from the pipeline and resized to fixed length. // The second pipeline 'tfEnginePipe' takes the resized integer vector and passes it to TensoFlow and gets the classification scores. var estimator = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("Features", "TokenizedWords") })); + .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, lookupMap.Schema["Words"], lookupMap.Schema["Ids"], + new ColumnOptions[] { ("Features", "TokenizedWords") })); var model = estimator.Fit(dataView); var dataPipe = mlContext.Model.CreatePredictionEngine(model); From 5585ea00fa415b5820dcc283c00ce4a196c1d6bd Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Tue, 19 Mar 2019 11:11:02 -0700 Subject: [PATCH 8/8] review comments --- .../Transforms/ConversionsExtensionsCatalog.cs | 4 ++-- src/Microsoft.ML.Transforms/CategoricalCatalog.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 75bf80198d..80f24389fb 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -354,8 +354,8 @@ public static ValueMappingEstimator MapValue( /// /// The conversion transform's catalog /// An instance of that contains the and columns. - /// Name of the key column in . - /// Name of the value column in . + /// The key column in . + /// The value column in . /// The columns to apply this transform on. /// A instance of the ValueMappingEstimator /// diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index 8ca938de27..22ebfe7890 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -13,7 +13,7 @@ namespace Microsoft.ML public static class CategoricalCatalog { /// - /// Convert several text column into one-hot encoded vectors. + /// Convert text columns into one-hot encoded vectors. /// /// The transform catalog /// Name of the column resulting from the transformation of . @@ -69,6 +69,7 @@ internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Ca /// The transform catalog /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. + /// The conversion mode. /// Number of bits to hash into. Must be between 1 and 30, inclusive. /// Hashing seed. /// Whether the position of each term should be included in the hash. @@ -76,7 +77,6 @@ internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Ca /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. - /// The conversion mode. public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null,