From 803bf57d0348c0842447beb90d18c9c1e5784792 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Tue, 15 Oct 2024 09:54:19 -0700 Subject: [PATCH 1/9] Implement leave-one-out test --- .../DomainServices/AnalysisGuessServices.cs | 10 ++++ .../AnalysisGuessServicesTests.cs | 54 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs index d74f3b1e..eedb2016 100644 --- a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs +++ b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs @@ -59,6 +59,8 @@ public enum OpinionAgent Human = 1, } + public AnalysisOccurrence IgnoreOccurrence { get; set; } + LcmCache Cache { get; set; } // PriorityCount provides a count of the number of times an analysis @@ -400,6 +402,11 @@ private Dictionary> GetAnalysisC { IAnalysis analysis = seg.AnalysesRS[i]; if (analysis.Wordform != wordform) continue; + if (IgnoreOccurrence != null && IgnoreOccurrence.Segment == seg && IgnoreOccurrence.Index == i) + { + // Leave this occurrence out. + continue; + } IAnalysis previous = GetPreviousWordform(seg, i); if (analysis is IWfiGloss) { @@ -413,6 +420,9 @@ private Dictionary> GetAnalysisC } } } + if (IgnoreOccurrence != null) + // Only include selected analyses. + return counts; // Include analyses that may not have been selected. foreach (IWfiAnalysis analysis in wordform.AnalysesOC) { diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index c8a5d460..30be4d85 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -2,7 +2,9 @@ // This software is licensed under the LGPL, version 2.1 or later // (http://www.gnu.org/licenses/lgpl-2.1.html) +using System; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.Linq; using NUnit.Framework; @@ -1443,5 +1445,57 @@ public void ExpectedContextAwareGloss_PreferTwoContextedOverOneContexted() Assert.AreEqual(uncontextedApprovedGloss, sorted_glosses[2]); } } + + [Test] + public void TestPrioritizerProject() + { + TestProject( + "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization", + "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization\\Test prioritization.fwdata" + ); + } + + private void TestProject(string projectsDirectory, string dbFileName) + { + var projectId = new TestProjectId(BackendProviderType.kXML, dbFileName); + var m_ui = new DummyLcmUI(); + var m_lcmDirectories = new TestLcmDirectories(projectsDirectory); + int total = 0; + int correct = 0; + using (var cache = LcmCache.CreateCacheFromExistingData(projectId, "en", m_ui, m_lcmDirectories, new LcmSettings(), + new DummyProgressDlg())) + { + AnalysisGuessServices guesser = new AnalysisGuessServices(cache); + IStTextRepository textRepository = cache.ServiceLocator.GetInstance(); + foreach (IStText text in textRepository.AllInstances()) + { + foreach (IStTxtPara para in text.ParagraphsOS) + { + foreach (var occurrence in SegmentServices.StTextAnnotationNavigator.GetWordformOccurrencesAdvancingInPara(para)) + { + var analysis = occurrence.Analysis; + if (analysis is IWfiGloss) + { + NonUndoableUnitOfWorkHelper.DoUsingNewOrCurrentUOW(cache.ActionHandlerAccessor, () => + { + guesser.ClearGuessData(); + guesser.IgnoreOccurrence = occurrence; + occurrence.Analysis = analysis.Wordform; + var bestGuess = guesser.GetBestGuess(occurrence); + occurrence.Analysis = analysis; + total++; + if (bestGuess == analysis) + correct++; + }); + } + } + } + } + } + float ratio = total == 0 ? 0 : (float)correct / (float)total; + Console.WriteLine("correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%)"); + Assert.AreEqual(52, total); + Assert.AreEqual(26, correct); + } } } From ac9df5200ea1c87d4fd29e500073253908568dbb Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Tue, 15 Oct 2024 11:35:43 -0700 Subject: [PATCH 2/9] Add ContextCount --- .../DomainServices/AnalysisGuessServices.cs | 117 +++++++++--------- 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs index eedb2016..8b61e379 100644 --- a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs +++ b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs @@ -9,14 +9,9 @@ // using System.Collections.Generic; -using System.Configuration; -using System.Diagnostics; using System.Linq; -using Icu; using SIL.LCModel.Core.KernelInterfaces; using SIL.LCModel.Core.Text; -using SIL.LCModel.Core.WritingSystems; -using SIL.LCModel.DomainImpl; using SIL.LCModel.Infrastructure; @@ -73,29 +68,37 @@ class PriorityCount public int count = 0; } - // First key of m_guessTable = word form (or analysis). - // Second key of m_guessTable = previous word form (including m_nullWAG for unknown). - // Final value of m_guessTable = default analysis (or gloss). - private IDictionary> m_guessTable; - IDictionary> GuessTable + class ContextCount + { + // First key is the previous/next wordform. + // Second key is an analysis that occurs in that context. + // The PriorityCount is the count for the analysis. + public IDictionary> previousWordform; + public IDictionary> nextWordform; + } + + // Key of m_guessTable = word form (or analysis). + // Value of m_guessTable = context counts for word form. + private IDictionary m_guessTable; + IDictionary GuessTable { get { if (m_guessTable == null) - GuessTable = new Dictionary>(); + GuessTable = new Dictionary(); return m_guessTable; } set { m_guessTable = value; } } // CaselessGuessTable is like GuessTable, but for uppercase word forms that can have lowercase analyses. - private IDictionary> m_caselessGuessTable; - IDictionary> CaselessGuessTable + private IDictionary m_caselessGuessTable; + IDictionary CaselessGuessTable { get { if (m_caselessGuessTable == null) - CaselessGuessTable = new Dictionary>(); + CaselessGuessTable = new Dictionary(); return m_caselessGuessTable; } set { m_caselessGuessTable = value; } @@ -300,25 +303,27 @@ public bool IsParserDisapproved(IWfiAnalysis candidate) /// bool private bool TryGetContextAwareGuess(IAnalysis form, IWfiWordform lowercaseForm, IAnalysis previous, out IAnalysis analysis) { - IDictionary> guessTable = lowercaseForm != null ? CaselessGuessTable : GuessTable; + IDictionary guessTable = lowercaseForm != null ? CaselessGuessTable : GuessTable; if (!guessTable.ContainsKey(form)) { // Fill in GuessTable. - guessTable[form] = GetDefaultAnalyses(form, lowercaseForm); + guessTable[form] = GetContextCounts(form); + if (lowercaseForm != null) + GetContextCounts(lowercaseForm, true, guessTable[form]); } - if (!guessTable[form].ContainsKey(previous)) + if (!guessTable[form].previousWordform.ContainsKey(previous)) { // back off to all forms. previous = m_nullWAG; - if (!guessTable[form].ContainsKey(previous)) + if (!guessTable[form].previousWordform.ContainsKey(previous)) { // form doesn't occur in the interlinear texts. analysis = m_nullWAG; return false; } } - analysis = guessTable[form][previous]; + analysis = GetBestAnalysis(guessTable[form], previous); if (analysis == null) return false; if (analysis is IWfiAnalysis) @@ -333,51 +338,45 @@ private bool TryGetContextAwareGuess(IAnalysis form, IWfiWordform lowercaseForm, } /// - /// Get the default analyses for the given form in the context of the previous word form. - /// If lowercaseForm is given, then include its analyses, too. - /// If form is an analysis,then the default analyses are glosses. - /// Uses m_emptyWAG as previous word form for the first analysis in a segment. - /// Uses m_nullWAG as previous word form when unknown. + /// Get the best analysis from counts given previous. + /// + /// + /// + /// + private IAnalysis GetBestAnalysis(ContextCount counts, IAnalysis previous) + { + IAnalysis best = null; + foreach (IAnalysis key in counts.previousWordform[previous].Keys) + { + if (best == null || ComparePriorityCounts(key, best, previous, counts.previousWordform) < 0) + { + best = key; + } + } + return best; + } + + /// + /// Get the context counts for form. /// - /// the form that you want analyses for - /// lowercase version of form - /// Dictionary - private Dictionary GetDefaultAnalyses(IAnalysis form, IWfiWordform lowercaseForm) + /// + /// whether form is lowercased + /// existing context counts + /// + private ContextCount GetContextCounts(IAnalysis form, bool lowercased = false, ContextCount counts = null) { - Dictionary defaults = new Dictionary(); - Dictionary> counts = null; + if (counts == null) + counts = new ContextCount(); if (form is IWfiWordform wordform) { - // Get default analyses. - counts = GetAnalysisCounts(wordform); - if (lowercaseForm != null) - // Add lowercase analyses to counts. - GetAnalysisCounts(lowercaseForm, true, counts); + counts.previousWordform = GetAnalysisCounts(wordform, lowercased, counts.previousWordform); } else if (form is IWfiAnalysis analysis) { // Get default glosses. - counts = GetGlossCounts(analysis); - } - if (counts != null) - { - // Get the best analysis for each key in counts. - foreach (IAnalysis previous in counts.Keys) - { - IAnalysis best = null; - // Use counts[previous].Keys instead of wordform.AnalysesOC - // because counts[previous].Keys may include lowercase analyses. - foreach (IAnalysis key in counts[previous].Keys) - { - if (best == null || ComparePriorityCounts(key, best, previous, counts) < 0) - { - best = key; - defaults[previous] = best; - } - } - } + counts.previousWordform = GetGlossCounts(analysis); } - return defaults; + return counts; } /// @@ -388,8 +387,8 @@ private Dictionary GetDefaultAnalyses(IAnalysis form, IWfi /// /// the form that you want an analysis for /// Dictionary> - private Dictionary> GetAnalysisCounts(IWfiWordform wordform, bool lowercased = false, - Dictionary> counts = null) + private IDictionary> GetAnalysisCounts(IWfiWordform wordform, bool lowercased = false, + IDictionary> counts = null) { if (counts == null) counts = new Dictionary>(); @@ -511,7 +510,7 @@ private IAnalysis GetPreviousWordform(ISegment seg, int i) /// the dictionary of counts being incremented /// void private void AddAnalysisCount(IAnalysis previous, IAnalysis analysis, int priority, bool lowercased, - Dictionary> counts) + IDictionary> counts) { if (previous != m_nullWAG) { @@ -548,7 +547,7 @@ private void AddAnalysisCount(IAnalysis previous, IAnalysis analysis, int priori /// Sort in descending order. /// private int ComparePriorityCounts(IAnalysis a1, IAnalysis a2, IAnalysis previous, - Dictionary> counts) + IDictionary> counts) { // Check for existence of previous. if (!counts.ContainsKey(previous)) From e189c4e800b74e1ba34eeccaed06716a236cc356 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Wed, 16 Oct 2024 09:33:27 -0700 Subject: [PATCH 3/9] Push occurrence down to ComparePriorityCounts --- .../DomainServices/AnalysisGuessServices.cs | 225 ++++++++++-------- .../AnalysisGuessServicesTests.cs | 3 +- 2 files changed, 122 insertions(+), 106 deletions(-) diff --git a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs index 8b61e379..dd86a794 100644 --- a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs +++ b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs @@ -8,6 +8,7 @@ // // +using System; using System.Collections.Generic; using System.Linq; using SIL.LCModel.Core.KernelInterfaces; @@ -290,7 +291,7 @@ public bool IsParserDisapproved(IWfiAnalysis candidate) } /// - /// Try to get the default analysis for form in the context of its previous word form. + /// Try to get the default analysis for form in the context of its occurrence. /// If form is an analysis,then the result is a gloss. /// If form is a wordform, then try to get the default gloss of the default analysis if it exists. /// Use m_emptyWAG as the previous word form for the first analysis in a segment. @@ -298,38 +299,19 @@ public bool IsParserDisapproved(IWfiAnalysis candidate) /// /// the form that you want an analysis for /// the lowercase version of form if its analyses should be included - /// the context of the form + /// where the form occurs (used for context) /// the resulting analysis /// bool - private bool TryGetContextAwareGuess(IAnalysis form, IWfiWordform lowercaseForm, IAnalysis previous, out IAnalysis analysis) + private bool TryGetContextAwareGuess(IAnalysis form, IWfiWordform lowercaseForm, AnalysisOccurrence occurrence, out IAnalysis analysis) { - IDictionary guessTable = lowercaseForm != null ? CaselessGuessTable : GuessTable; - - if (!guessTable.ContainsKey(form)) - { - // Fill in GuessTable. - guessTable[form] = GetContextCounts(form); - if (lowercaseForm != null) - GetContextCounts(lowercaseForm, true, guessTable[form]); - } - if (!guessTable[form].previousWordform.ContainsKey(previous)) - { - // back off to all forms. - previous = m_nullWAG; - if (!guessTable[form].previousWordform.ContainsKey(previous)) - { - // form doesn't occur in the interlinear texts. - analysis = m_nullWAG; - return false; - } - } - analysis = GetBestAnalysis(guessTable[form], previous); + IDictionary guessTable = GetGuessTable(form, lowercaseForm); + analysis = GetBestAnalysis(guessTable[form], occurrence); if (analysis == null) return false; if (analysis is IWfiAnalysis) { // Get the best gloss for analysis. - if (TryGetContextAwareGuess(analysis, null, previous, out IAnalysis gloss)) + if (TryGetContextAwareGuess(analysis, null, occurrence, out IAnalysis gloss)) { analysis = gloss; } @@ -337,18 +319,37 @@ private bool TryGetContextAwareGuess(IAnalysis form, IWfiWordform lowercaseForm, return true; } + private IDictionary GetGuessTable(IAnalysis form, IWfiWordform lowercaseForm) + { + IDictionary guessTable = lowercaseForm != null ? CaselessGuessTable : GuessTable; + + if (!guessTable.ContainsKey(form)) + { + // Fill in GuessTable. + guessTable[form] = GetContextCounts(form); + if (lowercaseForm != null) + GetContextCounts(lowercaseForm, true, guessTable[form]); + } + return guessTable; + } + /// - /// Get the best analysis from counts given previous. + /// Get the best analysis from counts given context of occurrence. /// /// - /// + /// /// - private IAnalysis GetBestAnalysis(ContextCount counts, IAnalysis previous) + private IAnalysis GetBestAnalysis(ContextCount counts, AnalysisOccurrence occurrence) { IAnalysis best = null; - foreach (IAnalysis key in counts.previousWordform[previous].Keys) + if (!counts.previousWordform.ContainsKey(m_nullWAG)) { - if (best == null || ComparePriorityCounts(key, best, previous, counts.previousWordform) < 0) + // No analyses to enumerate. + return null; + } + foreach (IAnalysis key in counts.previousWordform[m_nullWAG].Keys) + { + if (best == null || ComparePriorityCounts(key, best, occurrence, counts) < 0) { best = key; } @@ -369,26 +370,26 @@ private ContextCount GetContextCounts(IAnalysis form, bool lowercased = false, C counts = new ContextCount(); if (form is IWfiWordform wordform) { - counts.previousWordform = GetAnalysisCounts(wordform, lowercased, counts.previousWordform); + counts.previousWordform = GetAnalysisCounts(wordform, lowercased, true, counts.previousWordform); + counts.nextWordform = GetAnalysisCounts(wordform, lowercased, false, counts.nextWordform); } else if (form is IWfiAnalysis analysis) { // Get default glosses. - counts.previousWordform = GetGlossCounts(analysis); + counts.previousWordform = GetGlossCounts(analysis, true); + counts.nextWordform = GetGlossCounts(analysis, false); } return counts; } /// - /// Get analysis counts for the given word form in the context of the previous word form. + /// Get analysis counts for the given word form in its context. /// Uses m_emptyWAG as previous word form for the first analysis in a segment. /// Uses m_nullWAG as previous word form when unknown. /// This is used by GetBestGuess for word forms and GetSortedAnalysisGuesses. /// - /// the form that you want an analysis for - /// Dictionary> private IDictionary> GetAnalysisCounts(IWfiWordform wordform, bool lowercased = false, - IDictionary> counts = null) + bool previous = true, IDictionary> counts = null) { if (counts == null) counts = new Dictionary>(); @@ -406,7 +407,7 @@ private IDictionary> GetAnalysis // Leave this occurrence out. continue; } - IAnalysis previous = GetPreviousWordform(seg, i); + IAnalysis adjacent = GetAdjacentWordform(seg, i, previous); if (analysis is IWfiGloss) { // Get analysis for gloss. @@ -415,7 +416,7 @@ private IDictionary> GetAnalysis if (analysis is IWfiAnalysis) { // Add high priority count to analysis. - AddAnalysisCount(previous, analysis, 7, lowercased, counts); + AddAnalysisCount(adjacent, analysis, 7, lowercased, counts); } } } @@ -440,15 +441,14 @@ private IDictionary> GetAnalysis } /// - /// Get gloss counts for the given analysis in the context of the previous word form. - /// If form is an analysis,then the analysis counts are for glosses. + /// Get gloss counts for the given analysis in its context. /// Uses m_emptyWAG as previous word form for the first analysis in a segment. /// Uses m_nullWAG as previous word form when unknown. /// This is used by GetBestGuess for analyses and GetSortedGlossGuesses. /// /// the analysis that you want a gloss for /// Dictionary> - private Dictionary> GetGlossCounts(IWfiAnalysis analysis) + private Dictionary> GetGlossCounts(IWfiAnalysis analysis, bool previous) { var counts = new Dictionary>(); var segs = new HashSet(); @@ -461,14 +461,14 @@ private Dictionary> GetGlossCoun for (int i = 0; i < seg.AnalysesRS.Count; i++) { // Get gloss for analysis. - IAnalysis previous = GetPreviousWordform(seg, i); + IAnalysis adjacent = GetAdjacentWordform(seg, i, previous); IAnalysis gloss = seg.AnalysesRS[i]; if (gloss is IWfiGloss) { if (gloss.Analysis == analysis) { // Add high priority count to gloss. - AddAnalysisCount(previous, gloss, 2, false, counts); + AddAnalysisCount(adjacent, gloss, 2, false, counts); } } } @@ -501,54 +501,76 @@ private IAnalysis GetPreviousWordform(ISegment seg, int i) } /// - /// Add a count to counts for analysis with the given previous word form and the given priority. + /// Get the adjacent word form of a location. /// - /// the previous word form + /// the segment of the location + /// the index of the location + /// IAnalysis + private IAnalysis GetAdjacentWordform(ISegment seg, int i, bool previous) + { + if (previous && i == 0) + return m_emptyWAG; + if (!previous && i == seg.AnalysesRS.Count - 1) + return m_emptyWAG; + int j = previous ? i - 1 : i + 1; + IAnalysis adjacent = seg.AnalysesRS[j]; + if (adjacent is IWfiAnalysis || adjacent is IWfiGloss) + { + adjacent = adjacent.Wordform; + } + // Should be IWfiWordform or IPunctuationForm. + return adjacent; + } + + /// + /// Add a count to counts for analysis with the given context word form and the given priority. + /// + /// the context word form /// the analysis being counted /// the priority of the count /// whether the word form of the analysis was lowercased /// the dictionary of counts being incremented /// void - private void AddAnalysisCount(IAnalysis previous, IAnalysis analysis, int priority, bool lowercased, + private void AddAnalysisCount(IAnalysis context, IAnalysis analysis, int priority, bool lowercased, IDictionary> counts) { - if (previous != m_nullWAG) + if (context != m_nullWAG) { // Record count for unknown/backoff. AddAnalysisCount(m_nullWAG, analysis, priority, lowercased, counts); } - if (!counts.ContainsKey(previous)) + if (!counts.ContainsKey(context)) { - counts[previous] = new Dictionary(); + counts[context] = new Dictionary(); } - if (!counts[previous].ContainsKey(analysis)) + if (!counts[context].ContainsKey(analysis)) { - counts[previous][analysis] = new PriorityCount(); + counts[context][analysis] = new PriorityCount(); } - if (counts[previous][analysis].priority > priority) + if (counts[context][analysis].priority > priority) { // Ignore this count because its priority is too low. return; } - if (counts[previous][analysis].priority < priority) + if (counts[context][analysis].priority < priority) { // Start a new priority count. - counts[previous][analysis].priority = priority; - counts[previous][analysis].lowercased = lowercased; - counts[previous][analysis].count = 0; + counts[context][analysis].priority = priority; + counts[context][analysis].lowercased = lowercased; + counts[context][analysis].count = 0; } // Increment count. - counts[previous][analysis].count += 1; + counts[context][analysis].count += 1; } /// - /// Compare the priority counts for a1 and a2 based on - /// the previous wordform and a dictionary of counts. + /// Compare the priority counts for a1 and a2 based on the context of the occurrence. /// Sort in descending order. /// - private int ComparePriorityCounts(IAnalysis a1, IAnalysis a2, IAnalysis previous, - IDictionary> counts) + private int ComparePriorityCounts(IAnalysis a1, IAnalysis a2, AnalysisOccurrence occurrence, ContextCount contextCount) { + IAnalysis previous = occurrence != null ? GetPreviousWordform(occurrence.Segment, occurrence.Index) : m_nullWAG; + IDictionary> counts = contextCount.previousWordform; // Check for existence of previous. if (!counts.ContainsKey(previous)) { @@ -614,7 +636,7 @@ public IAnalysis GetBestGuess(IWfiWordform wf, int ws) if (!EntryGenerated(wf)) GenerateEntryGuesses(wf, ws); IAnalysis wag; - if (TryGetContextAwareGuess(wf, null, m_nullWAG, out wag)) + if (TryGetContextAwareGuess(wf, null, null, out wag)) return wag; return new NullWAG(); } @@ -627,7 +649,7 @@ public IAnalysis GetBestGuess(IWfiWordform wf, int ws) public IAnalysis GetBestGuess(IWfiAnalysis wa) { IAnalysis wag; - if (TryGetContextAwareGuess(wa, null, m_nullWAG, out wag)) + if (TryGetContextAwareGuess(wa, null, null, out wag)) return wag; return new NullWAG(); } @@ -641,34 +663,27 @@ public IAnalysis GetBestGuess(IWfiAnalysis wa) /// False: Do lowercase matching regardless of the occurrence index. /// /// - /// True: Consider previous word when getting best guess. - /// False: Ignore previous word when getting best guess. + /// True: Consider context when getting best guess. + /// False: Ignore context when getting best guess. /// public IAnalysis GetBestGuess(AnalysisOccurrence occurrence, bool onlyIndexZeroLowercaseMatching = true, bool includeContext = true) { - // first see if there is a relevant lowercase form of a sentence initial (non-lowercase) wordform - // TODO: make it look for the first word in the sentence...may not be at Index 0! - IWfiWordform lowercaseWf = null; int ws = occurrence.BaselineWs; - if (occurrence.Analysis is IWfiWordform wordform) - { - if (!EntryGenerated(wordform)) - GenerateEntryGuesses(wordform, ws); - if (!onlyIndexZeroLowercaseMatching || occurrence.Index == 0) - { - lowercaseWf = GetLowercaseWordform(occurrence, ws); - if (lowercaseWf != null && lowercaseWf != wordform) - { - if (!EntryGenerated(lowercaseWf)) - GenerateEntryGuesses(lowercaseWf, ws); - } - } - } if (ws == -1) return new NullWAG(); // happens with empty translation lines + + IWfiWordform lowercaseWf = null; + if (occurrence.Analysis is IWfiWordform wordform) + { + lowercaseWf = GetLowercaseWordform(occurrence, ws, onlyIndexZeroLowercaseMatching, wordform); + // Generate entries if necessary. + if (!EntryGenerated(wordform)) + GenerateEntryGuesses(wordform, ws); + if (lowercaseWf != null && !EntryGenerated(lowercaseWf)) + GenerateEntryGuesses(lowercaseWf, ws); + } IAnalysis bestGuess; - IAnalysis previous = includeContext ? GetPreviousWordform(occurrence.Segment, occurrence.Index) : m_nullWAG; - if (TryGetContextAwareGuess(occurrence.Analysis, lowercaseWf, previous, out bestGuess)) + if (TryGetContextAwareGuess(occurrence.Analysis, lowercaseWf, includeContext ? occurrence : null, out bestGuess)) return bestGuess; return new NullWAG(); } @@ -676,13 +691,19 @@ public IAnalysis GetBestGuess(AnalysisOccurrence occurrence, bool onlyIndexZeroL /// /// Get the lowercase word form if the occurrence is uppercase. /// - private IWfiWordform GetLowercaseWordform(AnalysisOccurrence occurrence, int ws) + private IWfiWordform GetLowercaseWordform(AnalysisOccurrence occurrence, int ws, + bool onlyIndexZeroLowercaseMatching, IWfiWordform wordform) { + // TODO: make it look for the first word in the sentence...may not be at Index 0! + if (occurrence == null) + return null; + if (onlyIndexZeroLowercaseMatching && occurrence.Index != 0) + return null; ITsString tssWfBaseline = occurrence.BaselineText; var cf = new CaseFunctions(Cache.ServiceLocator.WritingSystemManager.Get(ws)); string sLower = cf.ToLower(tssWfBaseline.Text); // don't bother looking up the lowercased wordform if the instanceOf is already in lowercase form. - if (sLower != tssWfBaseline.Text) + if (sLower != wordform.ShortName) { return GetWordformIfNeeded(sLower, ws); } @@ -808,26 +829,22 @@ private List GetSortedAnalysisGuesses(IWfiWordform wordform, int w if (originalCaseWf != null) wordform = originalCaseWf; } + IWfiWordform lowercaseWf = GetLowercaseWordform(occurrence, ws, onlyIndexZeroLowercaseMatching, wordform); + // Generate entries if necessary. if (!EntryGenerated(wordform)) GenerateEntryGuesses(wordform, ws); + if (lowercaseWf != null && !EntryGenerated(lowercaseWf)) + GenerateEntryGuesses(lowercaseWf, ws); - var counts = GetAnalysisCounts(wordform); + // Get analyses to sort. List analyses = wordform.AnalysesOC.ToList(); - if (occurrence != null && (!onlyIndexZeroLowercaseMatching || occurrence.Index == 0)) - { - IWfiWordform lowercaseWf = GetLowercaseWordform(occurrence, ws); - if (lowercaseWf != null && lowercaseWf != wordform) - { - // Add lowercase analyses. - if (!EntryGenerated(lowercaseWf)) - GenerateEntryGuesses(lowercaseWf, ws); - GetAnalysisCounts(lowercaseWf, true, counts); - analyses.AddRange(lowercaseWf.AnalysesOC); - } - } - var previous = occurrence == null ? m_nullWAG : GetPreviousWordform(occurrence.Segment, occurrence.Index); - analyses.Sort((x, y) => ComparePriorityCounts(x, y, previous, counts)); + if (lowercaseWf != null) + analyses.AddRange(lowercaseWf.AnalysesOC); + + // Sort analyses. + IDictionary guessTable = GetGuessTable(wordform, lowercaseWf); + analyses.Sort((x, y) => ComparePriorityCounts(x, y, occurrence, guessTable[wordform])); return analyses; } @@ -836,12 +853,12 @@ private List GetSortedAnalysisGuesses(IWfiWordform wordform, int w /// public List GetSortedGlossGuesses(IWfiAnalysis analysis, AnalysisOccurrence occurrence = null) { - var counts = GetGlossCounts(analysis); - var previous = occurrence == null ? m_nullWAG : GetPreviousWordform(occurrence.Segment, occurrence.Index); List glosses = analysis.MeaningsOC.ToList(); - glosses.Sort((x, y) => ComparePriorityCounts(x, y, previous, counts)); + IDictionary guessTable = GetGuessTable(analysis, null); + glosses.Sort((x, y) => ComparePriorityCounts(x, y, occurrence, guessTable[analysis])); return glosses; } + #region GenerateEntryGuesses /// /// This class stores the relevant database ids for information which can generate a diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index 30be4d85..ab47d58d 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -4,8 +4,6 @@ using System; using System.Collections.Generic; -using System.Diagnostics; -using System.IO; using System.Linq; using NUnit.Framework; using SIL.LCModel.Core.Text; @@ -554,6 +552,7 @@ public void ExpectedGuessForWord_GuessUpperAndLowerGenerated() // Test GetOriginalCaseWordform. // Set the analysis to the lowercase wordform. setup.Para0.SetAnalysis(0, 0, sorted_analyses[1]); + setup.GuessServices.ClearGuessData(); sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(occurrence.Analysis.Wordform, occurrence); // We should still get the uppercase wordform as a guess. // The lowercase wordform is preferred because it is human-approved. From 30ed21d8c9673a710763ff0021d08cc54a197f37 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Wed, 16 Oct 2024 11:37:15 -0700 Subject: [PATCH 4/9] Add GetContextScore for both previous and following word --- .../DomainServices/AnalysisGuessServices.cs | 48 +++++++++++++++---- .../AnalysisGuessServicesTests.cs | 32 ++++++++++--- 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs index dd86a794..6e1724f6 100644 --- a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs +++ b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs @@ -569,18 +569,19 @@ private void AddAnalysisCount(IAnalysis context, IAnalysis analysis, int priorit /// private int ComparePriorityCounts(IAnalysis a1, IAnalysis a2, AnalysisOccurrence occurrence, ContextCount contextCount) { - IAnalysis previous = occurrence != null ? GetPreviousWordform(occurrence.Segment, occurrence.Index) : m_nullWAG; - IDictionary> counts = contextCount.previousWordform; - // Check for existence of previous. - if (!counts.ContainsKey(previous)) + // Compare contexted counts. + if (occurrence != null) { - previous = m_nullWAG; - if (!counts.ContainsKey(previous)) - return 0; + float score1 = GetContextScore(a1, occurrence, contextCount); + float score2 = GetContextScore(a2, occurrence, contextCount); + if (score1 < score2) + return 1; + if (score1 > score2) + return -1; } - // See if we should back off. - if (!counts[previous].ContainsKey(a1) && !counts[previous].ContainsKey(a2)) - previous = m_nullWAG; + // Compare non-contexted counts. + IAnalysis previous = m_nullWAG; + IDictionary> counts = contextCount.previousWordform; // Prefer higher priority counts. int priority1 = counts[previous].ContainsKey(a1) ? counts[previous][a1].priority : 0; int priority2 = counts[previous].ContainsKey(a2) ? counts[previous][a2].priority : 0; @@ -605,6 +606,33 @@ private int ComparePriorityCounts(IAnalysis a1, IAnalysis a2, AnalysisOccurrence // Maintain a complete order to avoid non-determinism. // This means that GetBestGuess and GetSortedAnalyses[0] should have the same analysis. return a1.Guid.CompareTo(a2.Guid); + + } + + float GetContextScore(IAnalysis analysis, AnalysisOccurrence occurrence, ContextCount contextCount) + { + float previousScore = GetContextScore(analysis, occurrence, true, contextCount); + float nextScore = GetContextScore(analysis, occurrence, false, contextCount); + return previousScore + nextScore; + } + + float GetContextScore(IAnalysis analysis, AnalysisOccurrence occurrence, bool previous, ContextCount contextCount) + { + IAnalysis context = GetAdjacentWordform(occurrence.Segment, occurrence.Index, previous); + IDictionary < IAnalysis, Dictionary < IAnalysis, PriorityCount >> counts = previous + ? contextCount.previousWordform + : contextCount.nextWordform; + if (counts.ContainsKey(context) && + counts[context].ContainsKey(analysis)) + { + float count = counts[context][analysis].count; + float total = 0; + foreach (IAnalysis anal in counts[context].Keys) + total += counts[context][anal].count; + if (total > 0) + return count / total; + } + return 0; } /// diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index ab47d58d..5efade09 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -1445,16 +1445,36 @@ public void ExpectedContextAwareGloss_PreferTwoContextedOverOneContexted() } } - [Test] - public void TestPrioritizerProject() + public void TestPrioritionProject() { TestProject( "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization", - "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization\\Test prioritization.fwdata" + "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization\\Test prioritization.fwdata", + 30, 52 + ); + } + + [Test] + public void TestPrioritionApprovedProject() + { + TestProject( + "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Approved", + "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Approved\\Test prioritization-Approved.fwdata", + 37, 47 + ); + } + + [Test] + public void TestPrioritionUnapprovedProject() + { + TestProject( + "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Unapproved", + "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Unapproved\\Test prioritization-Unapproved.fwdata", + 18, 22 ); } - private void TestProject(string projectsDirectory, string dbFileName) + private void TestProject(string projectsDirectory, string dbFileName, int expectedCorrect, int expectedTotal) { var projectId = new TestProjectId(BackendProviderType.kXML, dbFileName); var m_ui = new DummyLcmUI(); @@ -1493,8 +1513,8 @@ private void TestProject(string projectsDirectory, string dbFileName) } float ratio = total == 0 ? 0 : (float)correct / (float)total; Console.WriteLine("correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%)"); - Assert.AreEqual(52, total); - Assert.AreEqual(26, correct); + Assert.AreEqual(expectedCorrect, correct); + Assert.AreEqual(expectedTotal, total); } } } From dc2cb73933390af72067782726ae47929e2c6e0c Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Fri, 18 Oct 2024 07:48:48 -0700 Subject: [PATCH 5/9] Add TestProjects --- .../AnalysisGuessServicesTests.cs | 63 ++++++++++--------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index 5efade09..032ea57a 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using System.IO; using System.Linq; using NUnit.Framework; using SIL.LCModel.Core.Text; @@ -1445,42 +1446,41 @@ public void ExpectedContextAwareGloss_PreferTwoContextedOverOneContexted() } } - public void TestPrioritionProject() - { - TestProject( - "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization", - "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization\\Test prioritization.fwdata", - 30, 52 - ); - } - [Test] - public void TestPrioritionApprovedProject() + public void TestProjects() { - TestProject( - "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Approved", - "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Approved\\Test prioritization-Approved.fwdata", - 37, 47 - ); + TestProjects("C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects"); } - [Test] - public void TestPrioritionUnapprovedProject() + private void TestProjects(string directory) { - TestProject( - "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Unapproved", - "C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects\\Test prioritization-Unapproved\\Test prioritization-Unapproved.fwdata", - 18, 22 - ); + float count = 0; + int correct = 0; + int total = 0; + foreach (string subdir in Directory.GetDirectories(directory)) + { + foreach (string file in Directory.GetFiles(subdir, "*.fwdata")) + { + int pCorrect; + int pTotal; + TestProject(subdir, file, out pCorrect, out pTotal); + if (pTotal == 0) continue; + correct += pCorrect; + total += pTotal; + count++; + } + } + float ratio = (float)correct / (float)total; + Console.WriteLine("overall correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%) for " + count + " projects"); } - private void TestProject(string projectsDirectory, string dbFileName, int expectedCorrect, int expectedTotal) + private void TestProject(string projectsDirectory, string dbFileName, out int outCorrect, out int outTotal) { + int correct = 0; + int total = 0; var projectId = new TestProjectId(BackendProviderType.kXML, dbFileName); var m_ui = new DummyLcmUI(); var m_lcmDirectories = new TestLcmDirectories(projectsDirectory); - int total = 0; - int correct = 0; using (var cache = LcmCache.CreateCacheFromExistingData(projectId, "en", m_ui, m_lcmDirectories, new LcmSettings(), new DummyProgressDlg())) { @@ -1488,10 +1488,13 @@ private void TestProject(string projectsDirectory, string dbFileName, int expect IStTextRepository textRepository = cache.ServiceLocator.GetInstance(); foreach (IStText text in textRepository.AllInstances()) { + if (total == 100) break; foreach (IStTxtPara para in text.ParagraphsOS) { + if (total == 100) break; foreach (var occurrence in SegmentServices.StTextAnnotationNavigator.GetWordformOccurrencesAdvancingInPara(para)) { + if (total == 100) break; var analysis = occurrence.Analysis; if (analysis is IWfiGloss) { @@ -1502,19 +1505,21 @@ private void TestProject(string projectsDirectory, string dbFileName, int expect occurrence.Analysis = analysis.Wordform; var bestGuess = guesser.GetBestGuess(occurrence); occurrence.Analysis = analysis; - total++; if (bestGuess == analysis) correct++; + total++; }); } } } } } + outCorrect = correct; + outTotal = total; + if (total < 5) return; float ratio = total == 0 ? 0 : (float)correct / (float)total; - Console.WriteLine("correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%)"); - Assert.AreEqual(expectedCorrect, correct); - Assert.AreEqual(expectedTotal, total); + string name = dbFileName.Substring(projectsDirectory.Length + 1); + Console.WriteLine("correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%): " + name); } } } From caf0e80183cf93e6941248ccc4d3e80957043c7c Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Fri, 18 Oct 2024 08:48:36 -0700 Subject: [PATCH 6/9] Add min and cutoff to TestProject --- .../DomainServices/AnalysisGuessServicesTests.cs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index 032ea57a..fd4d52d6 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -1463,8 +1463,10 @@ private void TestProjects(string directory) { int pCorrect; int pTotal; - TestProject(subdir, file, out pCorrect, out pTotal); - if (pTotal == 0) continue; + int min = 5; + int cutoff = 100; + TestProject(subdir, file, min, cutoff, out pCorrect, out pTotal); + if (pTotal < min) continue; correct += pCorrect; total += pTotal; count++; @@ -1474,7 +1476,7 @@ private void TestProjects(string directory) Console.WriteLine("overall correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%) for " + count + " projects"); } - private void TestProject(string projectsDirectory, string dbFileName, out int outCorrect, out int outTotal) + private void TestProject(string projectsDirectory, string dbFileName, int min, int cutoff, out int outCorrect, out int outTotal) { int correct = 0; int total = 0; @@ -1488,13 +1490,13 @@ private void TestProject(string projectsDirectory, string dbFileName, out int ou IStTextRepository textRepository = cache.ServiceLocator.GetInstance(); foreach (IStText text in textRepository.AllInstances()) { - if (total == 100) break; + if (total == cutoff) break; foreach (IStTxtPara para in text.ParagraphsOS) { - if (total == 100) break; + if (total == cutoff) break; foreach (var occurrence in SegmentServices.StTextAnnotationNavigator.GetWordformOccurrencesAdvancingInPara(para)) { - if (total == 100) break; + if (total == cutoff) break; var analysis = occurrence.Analysis; if (analysis is IWfiGloss) { @@ -1516,7 +1518,7 @@ private void TestProject(string projectsDirectory, string dbFileName, out int ou } outCorrect = correct; outTotal = total; - if (total < 5) return; + if (total < min) return; float ratio = total == 0 ? 0 : (float)correct / (float)total; string name = dbFileName.Substring(projectsDirectory.Length + 1); Console.WriteLine("correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%): " + name); From a408509028cacbe17bbb817d9a0930c902fc6c24 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Wed, 23 Oct 2024 09:39:56 -0700 Subject: [PATCH 7/9] Remove local TestProjects test --- .../AnalysisGuessServicesTests.cs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index fd4d52d6..5d8e7fe1 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -1446,12 +1446,10 @@ public void ExpectedContextAwareGloss_PreferTwoContextedOverOneContexted() } } - [Test] - public void TestProjects() - { - TestProjects("C:\\Users\\PC\\source\\repos\\FieldWorks\\DistFiles\\Projects"); - } - + /// + /// Test all of the projects in a directory. + /// + /// private void TestProjects(string directory) { float count = 0; @@ -1476,6 +1474,15 @@ private void TestProjects(string directory) Console.WriteLine("overall correct: " + correct.ToString() + ", total: " + total.ToString() + " (" + (100 * ratio).ToString() + "%) for " + count + " projects"); } + /// + /// Test a project. + /// + /// + /// + /// Skip project if it has less than min approved wordforms. + /// Number of approved wordforms to test + /// Number of correct guesses + /// Number of total guesses private void TestProject(string projectsDirectory, string dbFileName, int min, int cutoff, out int outCorrect, out int outTotal) { int correct = 0; From ac355148ed3a50886085b6398760aecc074bc681 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Wed, 23 Oct 2024 11:40:55 -0700 Subject: [PATCH 8/9] Add regression test --- .../AnalysisGuessServicesTests.cs | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index 5d8e7fe1..a14b308a 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -153,6 +153,23 @@ internal void DoDataSetup() bldr3.AppendTsString(TsStringUtils.MakeString( " " + Words_para0[19].Form.BestVernacularAlternative.Text + ".", wsVern)); Para0.Contents = bldr3.GetString(); + /* a c a a c a a c b b c b */ + var bldr4 = Para0.Contents.GetIncBldr(); + bldr4.AppendTsString(TsStringUtils.MakeString( + " " + Words_para0[1].Form.BestVernacularAlternative.Text + + " " + Words_para0[6].Form.BestVernacularAlternative.Text + + " " + Words_para0[1].Form.BestVernacularAlternative.Text + + " " + Words_para0[1].Form.BestVernacularAlternative.Text + + " " + Words_para0[6].Form.BestVernacularAlternative.Text + + " " + Words_para0[1].Form.BestVernacularAlternative.Text + + " " + Words_para0[1].Form.BestVernacularAlternative.Text + + " " + Words_para0[6].Form.BestVernacularAlternative.Text + + " " + Words_para0[4].Form.BestVernacularAlternative.Text + + " " + Words_para0[4].Form.BestVernacularAlternative.Text + + " " + Words_para0[6].Form.BestVernacularAlternative.Text + + " " + Words_para0[4].Form.BestVernacularAlternative.Text + + ".", wsVern)); + Para0.Contents = bldr4.GetString(); using (ParagraphParser pp = new ParagraphParser(Cache)) { foreach (IStTxtPara para in StText.ParagraphsOS) @@ -1446,6 +1463,37 @@ public void ExpectedContextAwareGloss_PreferTwoContextedOverOneContexted() } } + /// + /// Prefer glosses that are approved more often in the following context. + /// + [Test] + public void ExpectedContextAwareGloss_PreferFollowingContexted() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[4]; + var servLoc = segment.Cache.ServiceLocator; + var glossFactory = servLoc.GetInstance(); + var analysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var uncontextedApprovedGloss = glossFactory.Create(); + var contextedApprovedGloss1 = glossFactory.Create(); + var contextedApprovedGloss2 = glossFactory.Create(); + analysis.MeaningsOC.Add(uncontextedApprovedGloss); + analysis.MeaningsOC.Add(contextedApprovedGloss1); + analysis.MeaningsOC.Add(contextedApprovedGloss2); + // Analyses must be set in order. + setup.Para0.SetAnalysis(4, 1, contextedApprovedGloss1); // "a c a" + setup.Para0.SetAnalysis(4, 4, contextedApprovedGloss1); // "a c a" + setup.Para0.SetAnalysis(4, 7, contextedApprovedGloss2); // "a c b" + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 10); // "b c b" + // Check guess. + var guessActual = setup.GuessServices.GetBestGuess(occurrence); + // Prefer contextedApprovedGloss2 because it is followed by "b". + Assert.AreEqual(contextedApprovedGloss2, guessActual); + } + } + /// /// Test all of the projects in a directory. /// From 82dfc92bfb8cac8aa60f23a796a10c434ddbc6c1 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Thu, 24 Oct 2024 08:29:10 -0700 Subject: [PATCH 9/9] Move uncontexted counts from previousWordform to wordform --- .../DomainServices/AnalysisGuessServices.cs | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs index 6e1724f6..b1e2b995 100644 --- a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs +++ b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs @@ -76,6 +76,7 @@ class ContextCount // The PriorityCount is the count for the analysis. public IDictionary> previousWordform; public IDictionary> nextWordform; + public IDictionary wordform; } // Key of m_guessTable = word form (or analysis). @@ -329,6 +330,16 @@ private IDictionary GetGuessTable(IAnalysis form, IWfiW guessTable[form] = GetContextCounts(form); if (lowercaseForm != null) GetContextCounts(lowercaseForm, true, guessTable[form]); + // Move uncontexted counts from previousWordform to wordform. + if (guessTable[form].previousWordform.ContainsKey(m_nullWAG)) + { + guessTable[form].wordform = guessTable[form].previousWordform[m_nullWAG]; + guessTable[form].previousWordform.Remove(m_nullWAG); + guessTable[form].nextWordform.Remove(m_nullWAG); + } else + { + guessTable[form].wordform = new Dictionary(); + } } return guessTable; } @@ -342,12 +353,7 @@ private IDictionary GetGuessTable(IAnalysis form, IWfiW private IAnalysis GetBestAnalysis(ContextCount counts, AnalysisOccurrence occurrence) { IAnalysis best = null; - if (!counts.previousWordform.ContainsKey(m_nullWAG)) - { - // No analyses to enumerate. - return null; - } - foreach (IAnalysis key in counts.previousWordform[m_nullWAG].Keys) + foreach (IAnalysis key in counts.wordform.Keys) { if (best == null || ComparePriorityCounts(key, best, occurrence, counts) < 0) { @@ -580,25 +586,24 @@ private int ComparePriorityCounts(IAnalysis a1, IAnalysis a2, AnalysisOccurrence return -1; } // Compare non-contexted counts. - IAnalysis previous = m_nullWAG; - IDictionary> counts = contextCount.previousWordform; + IDictionary counts = contextCount.wordform; // Prefer higher priority counts. - int priority1 = counts[previous].ContainsKey(a1) ? counts[previous][a1].priority : 0; - int priority2 = counts[previous].ContainsKey(a2) ? counts[previous][a2].priority : 0; + int priority1 = counts.ContainsKey(a1) ? counts[a1].priority : 0; + int priority2 = counts.ContainsKey(a2) ? counts[a2].priority : 0; if (priority1 < priority2) return 1; if (priority1 > priority2) return -1; // Prefer higher counts. - int count1 = counts[previous].ContainsKey(a1) ? counts[previous][a1].count : 0; - int count2 = counts[previous].ContainsKey(a2) ? counts[previous][a2].count : 0; + int count1 = counts.ContainsKey(a1) ? counts[a1].count : 0; + int count2 = counts.ContainsKey(a2) ? counts[a2].count : 0; if (count1 < count2) return 1; if (count1 > count2) return -1; // Prefer analyses that haven't been lowercased. - bool lowercased1 = counts[previous].ContainsKey(a1) && counts[previous][a1].lowercased; - bool lowercased2 = counts[previous].ContainsKey(a2) && counts[previous][a2].lowercased; + bool lowercased1 = counts.ContainsKey(a1) && counts[a1].lowercased; + bool lowercased2 = counts.ContainsKey(a2) && counts[a2].lowercased; if (lowercased1 && !lowercased2) return 1; if (lowercased2 && !lowercased1)