diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs index 73512830086..9d0a040591a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs @@ -4,7 +4,7 @@ namespace Microsoft.ML.Samples.Dynamic { - public static class LdaTransform + public static class LatentDirichletAllocationTransform { public static void Example() { diff --git a/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs index c08d0aeed1b..25a2337c35c 100644 --- a/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs @@ -11,65 +11,65 @@ namespace Microsoft.ML.StaticPipe /// /// Information on the result of fitting a LDA transform. /// - public sealed class LdaFitResult + public sealed class LatentDirichletAllocationFitResult { /// /// For user defined delegates that accept instances of the containing type. /// /// - public delegate void OnFit(LdaFitResult result); + public delegate void OnFit(LatentDirichletAllocationFitResult result); public LatentDirichletAllocationTransformer.LdaSummary LdaTopicSummary; - public LdaFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary) + public LatentDirichletAllocationFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary) { LdaTopicSummary = ldaTopicSummary; } } - public static class LdaStaticExtensions + public static class LatentDirichletAllocationStaticExtensions { private struct Config { - public readonly int NumTopic; + public readonly int NumberOfTopics; public readonly Single AlphaSum; public readonly Single Beta; - public readonly int MHStep; - public readonly int NumIter; + public readonly int SamplingStepCount; + public readonly int MaximumNumberOfIterations; public readonly int LikelihoodInterval; - public readonly int NumThread; - public readonly int NumMaxDocToken; - public readonly int NumSummaryTermPerTopic; - public readonly int NumBurninIter; + public readonly int NumberOfThreads; + public readonly int MaximumTokenCountPerDocument; + public readonly int NumberOfSummaryTermsPerTopic; + public readonly int NumberOfBurninIterations; public readonly bool ResetRandomGenerator; public readonly Action OnFit; - public Config(int numTopic, Single alphaSum, Single beta, int mhStep, int numIter, int likelihoodInterval, - int numThread, int numMaxDocToken, int numSummaryTermPerTopic, int numBurninIter, bool resetRandomGenerator, + public Config(int numberOfTopics, Single alphaSum, Single beta, int samplingStepCount, int maximumNumberOfIterations, int likelihoodInterval, + int numberOfThreads, int maximumTokenCountPerDocument, int numberOfSummaryTermsPerTopic, int numberOfBurninIterations, bool resetRandomGenerator, Action onFit) { - NumTopic = numTopic; + NumberOfTopics = numberOfTopics; AlphaSum = alphaSum; Beta = beta; - MHStep = mhStep; - NumIter = numIter; + SamplingStepCount = samplingStepCount; + MaximumNumberOfIterations = maximumNumberOfIterations; LikelihoodInterval = likelihoodInterval; - NumThread = numThread; - NumMaxDocToken = numMaxDocToken; - NumSummaryTermPerTopic = numSummaryTermPerTopic; - NumBurninIter = numBurninIter; + NumberOfThreads = numberOfThreads; + MaximumTokenCountPerDocument = maximumTokenCountPerDocument; + NumberOfSummaryTermsPerTopic = numberOfSummaryTermsPerTopic; + NumberOfBurninIterations = numberOfBurninIterations; ResetRandomGenerator = resetRandomGenerator; OnFit = onFit; } } - private static Action Wrap(LdaFitResult.OnFit onFit) + private static Action Wrap(LatentDirichletAllocationFitResult.OnFit onFit) { if (onFit == null) return null; - return ldaTopicSummary => onFit(new LdaFitResult(ldaTopicSummary)); + return ldaTopicSummary => onFit(new LatentDirichletAllocationFitResult(ldaTopicSummary)); } private interface ILdaCol @@ -107,16 +107,16 @@ public override IEstimator Reconcile(IHostEnvironment env, infos[i] = new LatentDirichletAllocationEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[tcol.Input], - tcol.Config.NumTopic, + tcol.Config.NumberOfTopics, tcol.Config.AlphaSum, tcol.Config.Beta, - tcol.Config.MHStep, - tcol.Config.NumIter, + tcol.Config.SamplingStepCount, + tcol.Config.MaximumNumberOfIterations, tcol.Config.LikelihoodInterval, - tcol.Config.NumThread, - tcol.Config.NumMaxDocToken, - tcol.Config.NumSummaryTermPerTopic, - tcol.Config.NumBurninIter, + tcol.Config.NumberOfThreads, + tcol.Config.MaximumTokenCountPerDocument, + tcol.Config.NumberOfSummaryTermsPerTopic, + tcol.Config.NumberOfBurninIterations, tcol.Config.ResetRandomGenerator); if (tcol.Config.OnFit != null) @@ -136,36 +136,36 @@ public override IEstimator Reconcile(IHostEnvironment env, /// /// A vector of floats representing the document. - /// The number of topics. + /// The number of topics. /// Dirichlet prior on document-topic vectors. /// Dirichlet prior on vocab-topic vectors. - /// Number of Metropolis Hasting step. - /// Number of iterations. + /// Number of Metropolis Hasting step. + /// Number of iterations. /// Compute log likelihood over local dataset on this iteration interval. - /// The number of training threads. Default value depends on number of logical processors. - /// The threshold of maximum count of tokens per doc. - /// The number of words to summarize the topic. - /// The number of burn-in iterations. + /// The number of training threads. Default value depends on number of logical processors. + /// The threshold of maximum count of tokens per doc. + /// The number of words to summarize the topic. + /// The number of burn-in iterations. /// Reset the random number generator for each document. /// Called upon fitting with the learnt enumeration on the dataset. - public static Vector ToLdaTopicVector(this Vector input, - int numTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics, + public static Vector ToLatentDirichletAllocationTopicVector(this Vector input, + int numberOfTopics = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics, Single alphaSum = LatentDirichletAllocationEstimator.Defaults.AlphaSum, Single beta = LatentDirichletAllocationEstimator.Defaults.Beta, - int mhstep = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount, - int numIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations, + int samplingStepCount = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount, + int maximumNumberOfIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations, int likelihoodInterval = LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval, - int numThreads = LatentDirichletAllocationEstimator.Defaults.NumThreads, - int numMaxDocToken = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken, - int numSummaryTermPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic, - int numBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations, + int numberOfThreads = LatentDirichletAllocationEstimator.Defaults.NumThreads, + int maximumTokenCountPerDocument = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken, + int numberOfSummaryTermsPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic, + int numberOfBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations, bool resetRandomGenerator = LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator, - LdaFitResult.OnFit onFit = null) + LatentDirichletAllocationFitResult.OnFit onFit = null) { Contracts.CheckValue(input, nameof(input)); return new ImplVector(input, - new Config(numTopic, alphaSum, beta, mhstep, numIterations, likelihoodInterval, numThreads, numMaxDocToken, numSummaryTermPerTopic, - numBurninIterations, resetRandomGenerator, Wrap(onFit))); + new Config(numberOfTopics, alphaSum, beta, samplingStepCount, maximumNumberOfIterations, likelihoodInterval, numberOfThreads, maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic, + numberOfBurninIterations, resetRandomGenerator, Wrap(onFit))); } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 442e41c1caa..8ff5752def0 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -14,7 +14,6 @@ using Microsoft.ML.EntryPoints; using Microsoft.ML.Internal.Internallearn; using Microsoft.ML.Internal.Utilities; -using Microsoft.ML.Model; using Microsoft.ML.TextAnalytics; using Microsoft.ML.Transforms.Text; diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs index 0c6fb3c5105..8197b274d13 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs @@ -674,7 +674,7 @@ public void LdaTopicModel() var est = data.MakeNewEstimator() .Append(r => ( r.label, - topics: r.text.ToBagofWords().ToLdaTopicVector(numTopic: 3, numSummaryTermPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary))); + topics: r.text.ToBagofWords().ToLatentDirichletAllocationTopicVector(numberOfTopics: 3, numberOfSummaryTermsPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary))); var transformer = est.Fit(data); var tdata = transformer.Transform(data);