diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs index 9d0a040591a..239e7d93ac0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs @@ -30,7 +30,7 @@ public static void Example() // A pipeline for featurizing the "Review" column var pipeline = ml.Transforms.Text.ProduceWordBags(review). - Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numberOfTopics:3)); + Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numberOfTopics: 3)); // The transformed data var transformer = pipeline.Fit(trainData); diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index b473a88df9f..aec1bfe7634 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -945,10 +945,10 @@ internal static class Defaults /// Dirichlet prior on vocab-topic vectors. /// Number of Metropolis Hasting step. /// Number of iterations. - /// Compute log likelihood over local dataset on this iteration interval. /// The number of training threads. Default value depends on number of logical processors. /// The threshold of maximum count of tokens per doc. /// The number of words to summarize the topic. + /// Compute log likelihood over local dataset on this iteration interval. /// The number of burn-in iterations. /// Reset the random number generator for each document. internal LatentDirichletAllocationEstimator(IHostEnvironment env, @@ -958,10 +958,10 @@ internal LatentDirichletAllocationEstimator(IHostEnvironment env, float beta = Defaults.Beta, int samplingStepCount = Defaults.SamplingStepCount, int maximumNumberOfIterations = Defaults.MaximumNumberOfIterations, - int likelihoodInterval = Defaults.LikelihoodInterval, int numberOfThreads = Defaults.NumberOfThreads, int maximumTokenCountPerDocument = Defaults.MaximumTokenCountPerDocument, int numberOfSummaryTermsPerTopic = Defaults.NumberOfSummaryTermsPerTopic, + int likelihoodInterval = Defaults.LikelihoodInterval, int numberOfBurninIterations = Defaults.NumberOfBurninIterations, bool resetRandomGenerator = Defaults.ResetRandomGenerator) : this(env, new[] { new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs index 7c9acd1265e..230ed970d46 100644 --- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs +++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs @@ -580,16 +580,9 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. /// The number of topics. - /// Dirichlet prior on document-topic vectors. - /// Dirichlet prior on vocab-topic vectors. - /// Number of Metropolis Hasting step. /// Number of iterations. - /// Compute log likelihood over local dataset on this iteration interval. - /// The number of training threads. Default value depends on number of logical processors. /// The threshold of maximum count of tokens per doc. /// The number of words to summarize the topic. - /// The number of burn-in iterations. - /// Reset the random number generator for each document. /// /// /// new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog), - outputColumnName, inputColumnName, numberOfTopics, alphaSum, beta, samplingStepCount, maximumNumberOfIterations, likelihoodInterval, numberOfThreads, - maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic, numberOfBurninIterations, resetRandomGenerator); + outputColumnName, inputColumnName, numberOfTopics, + LatentDirichletAllocationEstimator.Defaults.AlphaSum, + LatentDirichletAllocationEstimator.Defaults.Beta, + LatentDirichletAllocationEstimator.Defaults.SamplingStepCount, + maximumNumberOfIterations, + LatentDirichletAllocationEstimator.Defaults.NumberOfThreads, + maximumTokenCountPerDocument, + numberOfSummaryTermsPerTopic, + LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval, + LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations, + LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator); /// /// Uses LightLDA to transform a document (represented as a vector of floats) diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs index bb6a250b793..dc2e5a15848 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs @@ -11,6 +11,7 @@ using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Text; using Xunit; namespace Microsoft.ML.RunTests @@ -1318,7 +1319,9 @@ public void TestLDATransform() builder.AddColumn("F1V", NumberDataViewType.Single, data); var srcView = builder.GetDataView(); - var est = ML.Transforms.Text.LatentDirichletAllocation("F1V", numberOfTopics: 3, numberOfSummaryTermsPerTopic: 3, alphaSum: 3, numberOfThreads: 1, resetRandomGenerator: true); + var opt = new LatentDirichletAllocationEstimator.ColumnOptions(name: "F1V", numberOfTopics: 3, + numberOfSummaryTermsPerTopic: 3, alphaSum: 3, numberOfThreads: 1, resetRandomGenerator: true); + var est = ML.Transforms.Text.LatentDirichletAllocation(opt); var ldaTransformer = est.Fit(srcView); var transformedData = ldaTransformer.Transform(srcView);