From e382ab4773d7d811a9a417cfb7773f5ce4ce06d4 Mon Sep 17 00:00:00 2001 From: "Lasse R.H. Nielsen" Date: Mon, 11 Nov 2024 18:21:50 +0100 Subject: [PATCH] Turned all look-ahead into tables. Add direct test for `isGrahphemeClusterBoundary`. --- .../lib/src/grapheme_clusters/breaks.dart | 304 +++++++----------- .../lib/src/grapheme_clusters/constants.dart | 56 +++- .../lib/src/grapheme_clusters/table.dart | 7 +- pkgs/characters/test/breaks_test.dart | 145 +++++++-- .../tool/src/automaton_builder.dart | 278 +++++++++++----- pkgs/characters/tool/src/debug_names.dart | 28 +- 6 files changed, 521 insertions(+), 297 deletions(-) diff --git a/pkgs/characters/lib/src/grapheme_clusters/breaks.dart b/pkgs/characters/lib/src/grapheme_clusters/breaks.dart index 689c2fd7..99adf215 100644 --- a/pkgs/characters/lib/src/grapheme_clusters/breaks.dart +++ b/pkgs/characters/lib/src/grapheme_clusters/breaks.dart @@ -394,9 +394,48 @@ bool isGraphemeClusterBoundary(String text, int start, int end, int index) { // The backwards automaton is built for this use case. // Most of the apparent complication in this function is merely dealing with // surrogates. - if (start <= index && index < end) { - var next = nextBreak(text, start, end, index); - return next == index; + if (start < index && index < end) { + int prevCategory, nextCategory; + var cursorBefore = index - 1; + var prevChar = text.codeUnitAt(cursorBefore); + var nextChar = text.codeUnitAt(index); + if (prevChar & 0xF800 != 0xD800) { + prevCategory = low(prevChar); + } else if (prevChar & 0xFC00 == 0xD800) { + // Either not a break because it's in the middle of a surrogate pair, + // or always a break after an unpaired surrogate. + return nextChar & 0xFC00 != 0xDC00; + } else if (start < cursorBefore) { + assert(prevChar & 0xFC00 == 0xDC00); + var headChar = text.codeUnitAt(--cursorBefore); + if (headChar & 0xFC00 != 0xD800) { + // Always break after unpaired tail surrogate. + return true; + } + prevCategory = high(headChar, prevChar); + } else { + // Break after unpaired tail surrogate. + return true; + } + if (nextChar & 0xF800 != 0xD800) { + nextCategory = low(nextChar); + } else if (nextChar & 0xFC00 == 0xD800 && index + 1 < end) { + var tailChar = text.codeUnitAt(index + 1); + if (tailChar & 0xFC00 != 0xDC00) { + // Always break before unpaired head surrogate. + return true; + } + nextCategory = high(nextChar, tailChar); + } else { + // Always break before unpaired tail surrogate. + return true; + } + var state = move(move(stateCAny, prevCategory), nextCategory); + if (state & maskBreak != flagNoBreak) { + return true; + } + if (state & maskLookahead == 0) return false; + return _lookaheadSimple(text, start, cursorBefore, state); } return true; } @@ -479,207 +518,88 @@ int nextBreak(String text, int start, int end, int index) { // The `prevCategory` now is the category for the character // from `indexBefore` to `index`, and `index` is the minium valid // return value (earliest next break). - - var state = stateSoTNoBreak; - // In many cases, the state at `index` can be predicted precisely - // from just the one prior character. That's the case for every - // input category where the output state doesn't depend on the input - // state (ignoring whether it breaks before the previous character or not). - // - // The cases where that is not possible are: - // - A regional indicator (need to know if there is an even or odd number - // of regional indicators before that). - // - An Extend{InCB=None|Extend|Linked} or ZWJ - // - For Extends{...} and ZWJ, it behaves differently if in state Pic - // (after Pictographic+Extend*). - // - For {InCB=Extend|Linked} or ZWJ (which is InCB=Extend), - // it behaves differently if in state InC or InCL, so after - // Other(InCB=Consonant)+(InCB={Extend|Linked})* - // In those cases, check the next character first. It may make the look-behind - // unnecessary, if it's a character that guarantees a break. - // Otherwise look-behind to see if the prior characters are `Pic+Extend` - // or `InCB=Consonant+InCB={Extend+Linked}` (and whether at least one Linked). - if (prevCategory == categoryRegionalIndicator || - prevCategory == categoryExtend || - prevCategory >= categoryZWJ) { - // >= ZWJ implies ZWJ|Extend(InCB={Extend|Linked}) for an input character. - // Only higher categories are synthetic EoT/SoT characters. - - // TODO: Can this be made into an automaton? - - var indexAfter = index + 1; + var state = move(stateCAny, prevCategory); + while (index < end) { var nextChar = text.codeUnitAt(index); - int nextCategory; - if (nextChar & 0xFC00 != 0xD800) { - nextCategory = low(nextChar); - } else if (indexAfter < end) { - // Lead surrogate. - var tailChar = text.codeUnitAt(indexAfter); - if (tailChar & 0xFC00 == 0xDC00) { - indexAfter += 1; - nextCategory = high(nextChar, tailChar); - } else { - return index; // Unpaired surrogate + var nextIndex = index + 1; + int category; + if (nextChar & 0xFC00 != 0xD800 || nextIndex == end) { + category = low(nextChar); + state = move(state, category); + if (state & maskFlags == flagNoBreak) { + index++; + continue; } } else { - return index; // Unpaired surrogate, treat as control. - } - - // The `nextCategory is the category of the character at positions - // from `index` to `indexAfter`. - - if (prevCategory == categoryRegionalIndicator) { - if (nextCategory == categoryRegionalIndicator) { - // Prev = RI, next = RI. - var idStateBefore = lookaheadRegional(text, start, indexBefore); - if (idStateBefore & maskBreak != flagNoBreak) { - // Break after previous character. - return index; - } - state = stateOther; - // Move index to after RI+RI. - index = indexAfter; - } else { - state = move(stateOther, nextCategory); - if (state & maskBreak != flagNoBreak) return index; - index = indexAfter; - } - } else if (prevCategory == categoryZWJ) { - if (nextCategory == categoryPictographic) { - var prevPic = lookaheadPictographicExtend(text, start, indexBefore); - if (prevPic < 0) { - return index; - } - state = statePictographic; - } else { - // Default for ZWJ if not after Pic+Ext* or - // InCB=Consonant+InCB={Extend|Linked}* - state = stateOther; - if (nextCategory >= categoryOtherIndicConsonant) { - var prevConsonant = lookaheadInCBLinkedConsonant( - text, start, indexBefore, prevCategory); - if (prevConsonant >= 0) { - state = prevConsonant.isOdd ? stateInCL : stateInC; - } - } - state = move(state, nextCategory); - if (state & maskBreak != flagNoBreak) return index; - } - } else if (prevCategory == categoryExtend || - nextCategory == categoryExtend) { - // The `categoryExtend` has InCB=None, so not affected by GB9c, only GB9b. - - // At this point `prevCategory` is definitely an Extend. - // Do Pictographic lookbehind if `nextCategory` is any Extend or ZWJ. - assert(prevCategory == categoryExtend || - prevCategory == categoryExtendIndicExtend || - prevCategory == categoryExtendIndicLinked); - state = stateOther; - if (nextCategory == categoryExtend || nextCategory >= categoryZWJ) { - // Look behind for Pic+Ext*. - var prevPic = lookaheadPictographicExtend(text, start, indexBefore); - if (prevPic >= 0) { - state = statePictographic; - } - } - state = move(state, nextCategory); - if (state & maskBreak != flagNoBreak) return index; - } else if (nextCategory == categoryOtherIndicConsonant) { - assert(prevCategory >= categoryExtendIndicExtend); - var prevConsonant = - lookaheadInCBLinkedConsonant(text, start, indexBefore, prevCategory); - if (prevConsonant.isEven) { - return index; + var tail = text.codeUnitAt(nextIndex); + category = categoryControl; + if (tail & 0xFC00 == 0xDC00) { + nextIndex += 1; + category = high(nextChar, tail); } - state = stateInC | flagNoBreak; - } else if (nextCategory >= categoryZWJ) { - assert(prevCategory >= categoryExtendIndicExtend); - // It's all Extend{InCB!=None}, can't say whether to look for - // Pic or InCB=Consonant. - state = lookaheadPictographicExtendOrIndic( - text, start, indexBefore, prevCategory, nextCategory); - if (state & maskBreak != flagNoBreak) { - return index; + state = move(state, category); + if (state & maskFlags == flagNoBreak) { + index = nextIndex; + continue; } + } + if (state & maskFlags == flagBreak) return index; + assert(state & maskFlags == flagLookahead); + + if (_lookaheadSimple(text, start, indexBefore, state)) return index; + + // Find the correct forward category. + // There are only three possible character categories that can trigger + // a look-behind. + if (category == categoryRegionalIndicator) { + assert(state == stateLookaheadRegionalEven | flagLookahead); + // Started by RI+RI. + state = stateRegionalEven; + } else if (category == categoryOtherIndicConsonant) { + assert( + state == (stateLookaheadInC | flagLookahead) || + state == (stateLookaheadInCL | flagLookahead), + state); + state = stateInC; } else { - // Doesn't need further lookahead, one character is enough. - state = move(stateSoTNoBreak, prevCategory); - state = move(state, nextCategory); - if (state & maskBreak != flagNoBreak) return index; + assert(category == categoryPictographic); + assert(state == (stateLookaheadZWJPictographic | flagLookahead)); + state = statePictographic; } - index = indexAfter; - } else { - // Just look at one prior character. - state = move(stateSoTNoBreak, prevCategory); + index = nextIndex; } - // Break wasn't at index, so move forward until finding the break. - return Breaks(text, index, text.length, state).nextBreak(); + assert(index == end); + return index; } -// Look behind for a Pic+Ext+ZWJ? or Consonant+(Extend|Linked)* sequence. -// Given the last two categories. -// -// The categories are ones that allow both prefixes, so -// [category1] is either [categoryExtendInCBExtend] or -// [categoryExtendInCBLinked], and [category2] is one of those or [categoryZWJ]. -// -// Returns the state after `category2`, with the break flag reporting -// whether to break before `category2` or not. -int lookaheadPictographicExtendOrIndic( - String text, int start, int cursor, int category1, int category2) { - assert( - category1 == categoryExtendIndicExtend || - category1 == categoryExtendIndicLinked, - category1); - assert( - category2 == categoryZWJ || - category2 == categoryExtendIndicExtend || - category2 == categoryExtendIndicLinked, - category2); - var linked = (category1 == categoryExtendIndicLinked || - category2 == categoryExtendIndicLinked) - ? (stateInCL | flagNoBreak) - : (stateInC | flagNoBreak); - loop: +/// Whether to break before a later character. +/// +/// Used only to find grapheme category breaks, not part of moving forwards +/// or backwards from known breaks. +/// +/// That character is always one of [categoryOtherIndicConsonant], +/// [categoryPictorgraphic] or [categoryRegionalIndicator], the only +/// characters where knowing whether to break before them depends on +/// more than the single prior character. +bool _lookaheadSimple(String text, int start, int cursor, int backState) { while (cursor > start) { - int category; - var char = text.codeUnitAt(--cursor); - if (char & 0xFC00 != 0xDC00) { - category = low(char); + var prevChar = text.codeUnitAt(--cursor); + if (prevChar & 0xFC00 != 0xDC00 || cursor == start) { + backState = moveBack(backState, low(prevChar)); + if (backState >= stateLookaheadMin) continue; } else { - if (cursor <= start) break; - var head = text.codeUnitAt(--cursor); - if (head & 0xFC00 != 0xD800) break; - category = high(head, char); - } - switch (category) { - case categoryExtend: - var prevPic = lookaheadPictographicExtend(text, start, cursor); - if (prevPic < 0) break loop; - continue pictographic; - pictographic: - case categoryPictographic: - return category2 == categoryZWJ - ? (statePictographicZWJ | flagNoBreak) - : (statePictographic | flagNoBreak); - case categoryZWJ: - var prevConsonant = - lookaheadInCBLinkedConsonant(text, start, cursor, category); - if (prevConsonant < 0) break loop; - if (prevConsonant.isOdd) { - return stateInCL | flagNoBreak; - } - return linked; - case categoryOtherIndicConsonant: - return linked; - case categoryExtendIndicLinked: - linked = stateInCL | flagNoBreak; - case categoryExtendIndicExtend: - break; // No change. - default: - break loop; + var headChar = text.codeUnitAt(--cursor); + int category; + if (headChar & 0xFC00 == 0xD800) { + category = high(headChar, prevChar); + } else { + category = categoryControl; + cursor++; + } + backState = moveBack(backState, category); + if (backState >= stateLookaheadMin) continue; } + return (backState & maskBreak != flagNoBreak); } - // Default behavior if no Pic or InCB=Consonant found. - return move(move(stateSoTNoBreak, category1), category2); + return moveBack(backState, categorySoT) & maskBreak != flagNoBreak; } diff --git a/pkgs/characters/lib/src/grapheme_clusters/constants.dart b/pkgs/characters/lib/src/grapheme_clusters/constants.dart index 33c0e5c7..492a32bb 100644 --- a/pkgs/characters/lib/src/grapheme_clusters/constants.dart +++ b/pkgs/characters/lib/src/grapheme_clusters/constants.dart @@ -72,7 +72,7 @@ const scaleState = automatonRowLength; // States of forwards automaton --------------------------------------- -// For each state, also have a `scaledState...` for the value of that +// For each state, also have a `scaleState...` for the value of that // state that occurs in the automaton tables (and which is an index // into the automaton tables). @@ -149,10 +149,62 @@ const int stateSoTNoBreak = idStateSoTNoBreak * scaleState; const StateId idStateSoT = 0x0D as StateId; const int stateSoT = idStateSoT * scaleState; +// Context-unaware states in forward automaton. +// States that do not know what's behind the current sequence of Ext{InCB=?}+ZWJ +// sequence, and which may need to trigger a look-behind in some cases. + +/// Start of context=unaware lookahead, no characters seen. +const StateId idStateCAny = 0x0E as StateId; +const int stateCAny = idStateCAny * scaleState; + +/// Seen ZWJ only, as the first (prior) character. +const StateId idStateCZWJ = 0x0F as StateId; +const stateCZWJ = idStateCZWJ * scaleState; + +/// Seen Extend{InCB=Extend}+ only. +const StateId idStateCIE = 0x10 as StateId; +const stateCIE = idStateCIE * scaleState; + +/// Seen Extend{InCB=Extend|Lined}+, with at least one Linked +const StateId idStateCIL = 0x11 as StateId; +const stateCIL = idStateCIL * scaleState; + +/// Seen Extend{InCB=Extend}+ + ZWJ +const StateId idStateCIEZ = 0x12 as StateId; +const stateCIEZ = idStateCIEZ * scaleState; + +/// Seen Extend{InCB=Extend|Linked}+ + ZWJ with at least one Linked +const StateId idStateCILZ = 0x13 as StateId; +const stateCILZ = idStateCILZ * scaleState; + +/// Seen (Extend{InCB=Extend}|ZWJ)+ with at least one non-trailing ZWJ +const StateId idStateCZIE = 0x14 as StateId; +const stateCZIE = idStateCZIE * scaleState; + +/// Seen (Extend{InCB=Extend|Linked}|ZWJ)+ +/// with at least one non-trailing ZWJ and at least one Linked. +const StateId idStateCZIL = 0x15 as StateId; +const stateCZIL = idStateCZIL * scaleState; + +/// Seen Extend{InCB=?}+ with at least one Extend{InCB=None} +const StateId idStateCExt = 0x16 as StateId; +const stateCExt = idStateCExt * scaleState; + +/// Seen Extend{InCB=?}+ + ZWJ with at least one Extend{InCB=None} +const StateId idStateCExZ = 0x17 as StateId; +const stateCExZ = idStateCExZ * scaleState; + +/// Seen [RegionalIndicator] only. +const StateId idStateCReg = 0x18 as StateId; +const stateCReg = idStateCReg * scaleState; + // -------------------------------------------------------------------- +/// First state which might trigger look-behind. +const StateId idStateMinContextUnaware = idStateCAny; + /// Number of states in forward automaton. -const StateId idStateCount = idStateSoT + 1 as StateId; +const StateId idStateCount = idStateCReg + 1 as StateId; // --------------------------------------------------------------------- // Backwards Automaton extra/alternative states and categories. diff --git a/pkgs/characters/lib/src/grapheme_clusters/table.dart b/pkgs/characters/lib/src/grapheme_clusters/table.dart index a573a42b..ed0ae3b8 100644 --- a/pkgs/characters/lib/src/grapheme_clusters/table.dart +++ b/pkgs/characters/lib/src/grapheme_clusters/table.dart @@ -1149,7 +1149,12 @@ const _stateMachine = '\x15\x01)))µ\x8d\x01=QeyeyÉ)))ñð\x15\x01)))µ\x8d\x00 '(µ\x8d\x01=QexeyÉ(((ñð\x15\x01)\x8c(µ\x8d\x01=QeyeyÉ\xa0\x8c\x8cñð\x15\x01' ')((µ\x8c\x01=QeyeyÉ(((ñð\x15\x01)(((\x8d\x01=QeyeyÉ(((ñð\x15\x01)((µ\x8d' '\x01=QeyeyÉÈÈÜñð\x15\x01)((µ\x8d\x01=QeyeyÈÜÜÜñð\x14\x00(((´\x8c\x00 _stateMachine.codeUnitAt((state & -4) + inputCategory); diff --git a/pkgs/characters/test/breaks_test.dart b/pkgs/characters/test/breaks_test.dart index 0fd91808..8553bb4d 100644 --- a/pkgs/characters/test/breaks_test.dart +++ b/pkgs/characters/test/breaks_test.dart @@ -9,6 +9,7 @@ import 'package:characters/src/grapheme_clusters/constants.dart'; import 'package:characters/src/grapheme_clusters/table.dart'; import "package:test/test.dart"; +import '../tool/src/debug_names.dart'; import 'src/equiv.dart'; import "src/unicode_tests.dart"; @@ -56,7 +57,8 @@ void main() { } }); - // Test the [nextBreak] function on all positions of all the Unicode tests. + // Test the top-level [nextBreak] function on all positions of all + // the Unicode tests. group("nextBreak", () { // Should find the next break at any position. for (var (expectedParts, _) in splitTests) { @@ -81,6 +83,61 @@ void main() { } }); + // Test the top-level [previousBreak] function on all positions of all + // the Unicode tests. + group("previousBreak", () { + // Should find the next break at any position. + for (var (expectedParts, _) in splitTests) { + for (var (variantParts, kind) in testVariants(expectedParts)) { + test(testDescription(variantParts) + kind, () { + var input = variantParts.join(""); + var description = partCategories(expectedParts); + var partCursor = 0; + var nextBreak = 0; + var expectedBreak = 0; + + for (var i = 0; i <= input.length; i++) { + if (i == nextBreak) { + expectedBreak = nextBreak; + if (i < input.length) { + nextBreak += variantParts[partCursor++].length; + } + } + var actualBreak = previousBreak(input, 0, input.length, i); + expect(actualBreak, expectedBreak, + reason: "at $i: $description$kind"); + } + }); + } + } + }); + + // Test the top-level [previousBreak] function on all positions of all + // the Unicode tests. + group("isGraphemeClusterBreak", () { + // Should find the next break at any position. + for (var (expectedParts, _) in splitTests) { + for (var (variantParts, kind) in testVariants(expectedParts)) { + test(testDescription(variantParts) + kind, () { + var input = variantParts.join(""); + var description = partCategories(expectedParts); + var partCursor = 0; + var nextBreak = 0; + + for (var i = 0; i <= input.length; i++) { + expect(isGraphemeClusterBoundary(input, 0, input.length, i), + i == nextBreak, + reason: "at $i: $description"); + + if (i == nextBreak && i < input.length) { + nextBreak += variantParts[partCursor++].length; + } + } + }); + } + } + }); + // Check that automatons are minimal. // // * All states are reachable from the start states. @@ -104,35 +161,68 @@ void main() { stateRegionalSingle, stateInC, stateInCL, - // Not expected in output. - // stateSoT, - // Used as filler and as state after EoT. - stateSoTNoBreak, + stateSoT, // Entry point. + stateSoTNoBreak, // Entry point. + stateCAny, // Entry point. + stateCZWJ, + stateCExZ, + stateCIE, + stateCIEZ, + stateCIL, + stateCILZ, + stateCZIE, + stateCZIL, + stateCReg, + stateCExt, }; // Standard reachability algorithm. // Fringe of reachable states. Will contain all reachable states once. - var workList = [idStateSoTNoBreak]; + + var entryStates = [stateSoTNoBreak, stateSoT, stateCAny]; + // All reachable state will be removed from this set, // and added to the worklist the first time they are seen. - var unreachableStates = {...states}; + var unreachableStates = {...states}..removeAll(entryStates); + // Start with entry points. + var workList = [...entryStates]; + var nextStepList = []; - var step = 0; + var step = 1; // Continue until all states reachable, or no states left in fringe. - while (workList.isNotEmpty && unreachableStates.isNotEmpty) { - step++; + while ((workList.isNotEmpty || nextStepList.isNotEmpty) && + unreachableStates.isNotEmpty) { + if (workList.isEmpty) { + workList = nextStepList; + nextStepList = []; + step++; + } var state = workList.removeLast(); for (var c = 0; c < categoryCount; c++) { var newState = move(state, c) & maskState; + if (newState & maskFlags == flagLookahead) { + // A lookahead in the forwards automaton uses the + // backwards automaton to determine whether to break. + // It should leave the context-unaware part of the states + // and reach a state that should otherwise be reachable too. + continue; + } // No unexpected output states. expect(states, contains(newState), reason: "($state,$c): Unexpected output state"); // Add to fringe the first time a state is seen. if (unreachableStates.remove(newState)) { - workList.add(newState); + nextStepList.add(newState); } } } - expect(unreachableStates, isEmpty, reason: "Should be reachable"); + if (unreachableStates.isNotEmpty) { + expect( + unreachableStates + .map((s) => stateShortNames[s ~/ scaleState]) + .toList(), + isEmpty, + reason: "Should be reachable"); + } print("Forward states reachable in $step steps"); }); @@ -189,7 +279,6 @@ void main() { }); test("States backward reachable", () { - var workList = [stateEoTNoBreak]; var states = { stateBreak, stateLF, @@ -206,7 +295,7 @@ void main() { // -- Only reachable through lookahead. stateRegionalEven, // -- Not reachable, only used as start state. - // stateEoT, + stateEoT, // Used as filler, and state after EoT. stateEoTNoBreak, stateLookaheadZWJPictographic, @@ -215,17 +304,25 @@ void main() { stateLookaheadRegionalEven, stateLookaheadRegionalOdd, }; - var unreachableStates = {...states}; - var step = 0; + var entryStates = [stateEoTNoBreak, stateEoT]; + var unreachableStates = {...states}..removeAll(entryStates); + var workList = [...entryStates]; + var nextStepList = []; + var step = 1; - while (workList.isNotEmpty) { - step += 1; + while ((workList.isNotEmpty || nextStepList.isNotEmpty) && + unreachableStates.isNotEmpty) { + if (workList.isEmpty) { + step++; + workList = nextStepList; + nextStepList = []; + } var state = workList.removeLast(); for (var c = 0; c < categoryCount; c++) { var newState = moveBack(state, c) & maskState; expect(states, contains(newState), reason: "Unexpected output state"); if (unreachableStates.remove(newState)) { - workList.add(newState); + nextStepList.add(newState); } } if (unreachableStates.isEmpty) { @@ -233,8 +330,14 @@ void main() { return; } } - expect(unreachableStates, isEmpty, - reason: "Should be reachable, not reached in $step steps"); + if (unreachableStates.isNotEmpty) { + expect( + unreachableStates + .map((s) => stateShortNames[s ~/ scaleState]) + .toList(), + isEmpty, + reason: "Should be reachable, not reached in $step steps"); + } }); test("Backward states distinguishable", () { diff --git a/pkgs/characters/tool/src/automaton_builder.dart b/pkgs/characters/tool/src/automaton_builder.dart index 19fac1bf..3caf15f6 100644 --- a/pkgs/characters/tool/src/automaton_builder.dart +++ b/pkgs/characters/tool/src/automaton_builder.dart @@ -29,39 +29,53 @@ import "string_literal_writer.dart"; // // Stored as string for comparison to actual generated automaton. const expectedAutomatonDescription = r""" -Cat : State - : Brk CR Otr Pre L V T Pic PicZ Reg InC InCL SoTN SoT : ------------------------------------------------------------------------------ -CR : !CR !CR !CR !CR !CR !CR !CR !CR !CR !CR !CR !CR CR !CR : -Ctl : !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk Brk !Brk : -Otr : !Otr !Otr !Otr Otr !Otr !Otr !Otr !Otr !Otr !Otr !Otr !Otr Otr !Otr : -Ext : !Otr !Otr Otr Otr Otr Otr Otr Pic Otr Otr Otr Otr Otr !Otr : -Spc : !Otr !Otr Otr Otr Otr Otr Otr Otr Otr Otr Otr Otr Otr !Otr : -Reg : !Reg !Reg !Reg Reg !Reg !Reg !Reg !Reg !Reg Otr !Reg !Reg Reg !Reg : -Pic : !Pic !Pic !Pic Pic !Pic !Pic !Pic !Pic Pic !Pic !Pic !Pic Pic !Pic : -LF : !Brk Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk Brk !Brk : -Pre : !Pre !Pre !Pre Pre !Pre !Pre !Pre !Pre !Pre !Pre !Pre !Pre Pre !Pre : -L : !L !L !L L L !L !L !L !L !L !L !L L !L : -V : !V !V !V V V V !V !V !V !V !V !V V !V : -T : !T !T !T T !T T T !T !T !T !T !T T !T : -LV : !V !V !V V V !V !V !V !V !V !V !V V !V : -LVT : !T !T !T T T !T !T !T !T !T !T !T T !T : -OInC: !InC !InC !InC InC !InC !InC !InC !InC !InC !InC !InC InC InC !InC : -ZWJ : !Otr !Otr Otr Otr Otr Otr Otr PicZ Otr Otr InC InCL Otr !Otr : -EInE: !Otr !Otr Otr Otr Otr Otr Otr Pic Otr Otr InC InCL Otr !Otr : -EInL: !Otr !Otr Otr Otr Otr Otr Otr Pic Otr Otr InCL InCL Otr !Otr : -EoT : ! - ! - ! - ! - ! - ! - ! - ! - ! - ! - ! - ! - - - : +Stat: Cat + : CR Ctl Otr Ext Spc Reg Pic LF Pre L V T LV LVT OInC ZWJ EInE EInL EoT : +----------------------------------------------------------------------------------------------------- +Brk :!CR !Brk !Otr !Otr !Otr !Reg !Pic !Brk !Pre !L !V !T !V !T !InC !Otr !Otr !Otr ! - : +CR :!CR !Brk !Otr !Otr !Otr !Reg !Pic Brk !Pre !L !V !T !V !T !InC !Otr !Otr !Otr ! - : +Otr :!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre !L !V !T !V !T !InC Otr Otr Otr ! - : +Pre :!CR !Brk Otr Otr Otr Reg Pic !Brk Pre L V T V T InC Otr Otr Otr ! - : +L :!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre L V !T V T !InC Otr Otr Otr ! - : +V :!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre !L V T !V !T !InC Otr Otr Otr ! - : +T :!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre !L !V T !V !T !InC Otr Otr Otr ! - : +Pic :!CR !Brk !Otr Pic Otr !Reg !Pic !Brk !Pre !L !V !T !V !T !InC PicZ Pic Pic ! - : +PicZ:!CR !Brk !Otr Otr Otr !Reg Pic !Brk !Pre !L !V !T !V !T !InC Otr Otr Otr ! - : +Reg :!CR !Brk !Otr Otr Otr Otr !Pic !Brk !Pre !L !V !T !V !T !InC Otr Otr Otr ! - : +InC :!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre !L !V !T !V !T !InC InC InC InCL! - : +InCL:!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre !L !V !T !V !T InC InCL InCL InCL! - : +SoTN: CR Brk Otr Otr Otr Reg Pic Brk Pre L V T V T InC Otr Otr Otr - : +SoT :!CR !Brk !Otr !Otr !Otr !Reg !Pic !Brk !Pre !L !V !T !V !T !InC !Otr !Otr !Otr - : +CAny:!CR !Brk Otr CExt Otr CReg!Pic !Brk Pre L V T V T InC CZWJ CIE CIL - : +CZWJ:!CR !Brk !Otr Otr Otr !Reg $LAZP!Brk !Pre !L !V !T !V !T $LAIC CZIE CZIE CZIL! - : +CIE :!CR !Brk !Otr CExt Otr !Reg !Pic !Brk !Pre !L !V !T !V !T $LAIC CIEZ CIE CIL ! - : +CIL :!CR !Brk !Otr CExt Otr !Reg !Pic !Brk !Pre !L !V !T !V !T $LAIL CILZ CIL CIL ! - : +CIEZ:!CR !Brk !Otr Otr Otr !Reg $LAZP!Brk !Pre !L !V !T !V !T !InC CZIE CZIE CZIL! - : +CILZ:!CR !Brk !Otr Otr Otr !Reg $LAZP!Brk !Pre !L !V !T !V !T $LAIL CZIL CZIL CZIL! - : +CZIE:!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre !L !V !T !V !T $LAIC CZIE CZIE CZIL! - : +CZIL:!CR !Brk !Otr Otr Otr !Reg !Pic !Brk !Pre !L !V !T !V !T $LAIL CZIL CZIL CZIL! - : +CExt:!CR !Brk !Otr CExt Otr !Reg !Pic !Brk !Pre !L !V !T !V !T !InC CExZ CExt CExt! - : +CExZ:!CR !Brk !Otr Otr Otr !Reg $LAZP!Brk !Pre !L !V !T !V !T !InC Otr Otr Otr ! - : +CReg:!CR !Brk !Otr Otr Otr $LARe!Pic !Brk !Pre !L !V !T !V !T !InC Otr Otr Otr ! - : """; void writeForwardAutomaton(StringSink buffer, {required bool verbose}) { assert(categories.length == categoryCount); assert(automatonRowLength.isEven && automatonRowLength >= categoryCount); var table = Uint16List(idStateCount * automatonRowLength); + void transitionLA( + StateId stateId, int category, StateId newStateId, int flags) { + assert(flags <= maskFlags); + assert(flags & flagLookahead == 0 || newStateId >= idStateLookaheadMin); + table[stateId * automatonRowLength + category] = + (newStateId * scaleState) + flags; + } + void transition( StateId stateId, int category, StateId newStateId, bool breakBefore) { assert(newStateId < idStateCount, "$stateId + $category -> $newStateId"); - table[stateId * automatonRowLength + category] = - (newStateId * scaleState) | (breakBefore ? flagBreak : flagNoBreak); + transitionLA( + stateId, category, newStateId, breakBefore ? flagBreak : flagNoBreak); } for (var state = 0 as StateId; @@ -74,8 +88,9 @@ void writeForwardAutomaton(StringSink buffer, {required bool verbose}) { // States that should never be broken after, unless `alwaysBreakBefore` // says otherwise (for example the rules in GB1..GB5). - var neverBreakBefore = - state == idStateSoTNoBreak || state == idStatePrepend; + var neverBreakBefore = state == idStateSoTNoBreak || + state == idStateCAny || // Break in this state never matters. + state == idStatePrepend; // Other with InCB=None. // No rules apply specifically to Other, so break unless an @@ -85,9 +100,19 @@ void writeForwardAutomaton(StringSink buffer, {required bool verbose}) { // GB9C. (Break unless Any rule applies, or preceded by indic sequence // with at least one Linked, `idStateInCL`). // Remember having seen InCB=Consonant and no InCB=Linked yet. - transition(state, categoryOtherIndicConsonant, idStateInC, - !(neverBreakBefore || state == idStateInCL)); + if (state == idStateCZWJ || state == idStateCIE || state == idStateCZIE) { + transitionLA(state, categoryOtherIndicConsonant, idStateLookaheadInC, + flagLookahead); + } else if (state == idStateCIL || + state == idStateCILZ || + state == idStateCZIL) { + transitionLA(state, categoryOtherIndicConsonant, idStateLookaheadInCL, + flagLookahead); + } else { + transition(state, categoryOtherIndicConsonant, idStateInC, + !(neverBreakBefore || state == idStateInCL || state == idStateCAny)); + } // CR. // GB4 + GB5. Always break, after unless followed by LF, so remember // having seen CR (`idStateCR`). @@ -128,7 +153,7 @@ void writeForwardAutomaton(StringSink buffer, {required bool verbose}) { transition(state, categoryZWJ, state, false); // Extend with InCB=Linked. transition(state, categoryExtendIndicLinked, idStateInCL, false); - } else { + } else if (state < idStateMinContextUnaware || state == idStateCReg) { // GB9 alone. // No special rules for breaking after, // break before only if required by GB1-GB5. @@ -138,6 +163,65 @@ void writeForwardAutomaton(StringSink buffer, {required bool verbose}) { transition( state, categoryExtendIndicLinked, idStateOther, alwaysBreakBefore); transition(state, categoryZWJ, idStateOther, alwaysBreakBefore); + } else { + transition( + state, + categoryZWJ, + switch (state) { + idStateCAny => idStateCZWJ, + idStateCZWJ => idStateCZIE, + idStateCIE => idStateCIEZ, + idStateCIL => idStateCILZ, + idStateCIEZ => idStateCZIE, + idStateCILZ => idStateCZIL, + idStateCZIE => idStateCZIE, + idStateCZIL => idStateCZIL, + idStateCExt => idStateCExZ, + _ => idStateOther, + }, + false); + transition( + state, + categoryExtend, + (state == idStateCAny || + state == idStateCIE || + state == idStateCIL || + state == idStateCExt) + ? idStateCExt + : idStateOther, + false); + transition( + state, + categoryExtendIndicExtend, + switch (state) { + idStateCAny => idStateCIE, + idStateCZWJ => idStateCZIE, + idStateCIE => idStateCIE, + idStateCIL => idStateCIL, + idStateCIEZ => idStateCZIE, + idStateCILZ => idStateCZIL, + idStateCZIE => idStateCZIE, + idStateCZIL => idStateCZIL, + idStateCExt => idStateCExt, + _ => idStateOther, + }, + false); + transition( + state, + categoryExtendIndicLinked, + switch (state) { + idStateCAny => idStateCIL, + idStateCZWJ => idStateCZIL, + idStateCIE => idStateCIL, + idStateCIL => idStateCIL, + idStateCIEZ => idStateCZIL, + idStateCILZ => idStateCZIL, + idStateCZIE => idStateCZIL, + idStateCZIL => idStateCZIL, + idStateCExt => idStateCExt, + _ => idStateOther, + }, + false); } // Regional indicator. // GB12 + GB13: Don't break if after an odd number of Reg. @@ -145,11 +229,17 @@ void writeForwardAutomaton(StringSink buffer, {required bool verbose}) { // prior state says not to. if (state == idStateRegionalSingle) { transition(state, categoryRegionalIndicator, idStateOther, false); + } else if (state == idStateCAny) { + transition(state, categoryRegionalIndicator, idStateCReg, false); + } else if (state == idStateCReg) { + transitionLA(state, categoryRegionalIndicator, + idStateLookaheadRegionalEven, flagLookahead); } else { // Break unless prior state says not to. transition(state, categoryRegionalIndicator, idStateRegionalSingle, !neverBreakBefore); } + // Prepend. // GB9b: Never break after Prepend (unless required by next character // due to GB1..GB5). @@ -173,17 +263,30 @@ void writeForwardAutomaton(StringSink buffer, {required bool verbose}) { !(neverBreakBefore || state == idStateV || state == idStateT)); // Emoji // GB11. - transition( - state, - categoryPictographic, - idStatePictographic, - state != idStatePrepend && - state != idStatePictographicZWJ && - state != idStateSoTNoBreak); + if (state == idStateCZWJ || + state == idStateCExZ || + state == idStateCIEZ || + state == idStateCILZ) { + transitionLA(state, categoryPictographic, idStateLookaheadZWJPictographic, + flagLookahead); + } else { + transition( + state, + categoryPictographic, + idStatePictographic, + state != idStatePrepend && + state != idStatePictographicZWJ && + state != idStateSoTNoBreak); + } // End of input. // GB2. - transition(state, categoryEoT, idStateSoTNoBreak, - state != idStateEoT && state != idStateSoTNoBreak); + transition( + state, + categoryEoT, + idStateSoTNoBreak, + state != idStateSoT && + state != idStateSoTNoBreak && + state != idStateCAny); // Pad table if necessary. for (var c = categoryCount; c < automatonRowLength; c++) { @@ -249,28 +352,28 @@ const categories = [ // // Stored as string for comparison to actual generated automaton. const expectedBackAutomatonDescription = r""" -Cat : State - : Brk LF Otr Ext L V T Pic RegO Reg InC RegE EoTN EoT LAZP LAIC LAIL LARe LARo: ------------------------------------------------------------------------------------------------------- -CR : !Brk Brk !Brk !Brk !Brk !Brk !Brk !Brk - !Brk !Brk !Brk Brk !Brk #Ext #Ext #Ext RegE!RegO: -Ctl : !Brk !Brk !Brk !Brk !Brk !Brk !Brk !Brk - !Brk !Brk !Brk Brk !Brk #Ext #Ext #Ext RegE!RegO: -Otr : !Otr !Otr !Otr Otr !Otr !Otr !Otr !Otr - !Otr !Otr !Otr Otr !Otr !Otr !Otr !Otr RegE!RegO: -Ext : !Ext !Ext !Ext Ext !Ext !Ext !Ext !Ext - !Ext !Ext !Ext Ext !Ext LAZP!Ext !Ext RegE!RegO: -Spc : !Ext !Ext !Ext Ext !Ext !Ext !Ext !Ext - !Ext !Ext !Ext Ext !Ext !Ext !Ext !Ext RegE!RegO: -Reg : !Reg !Reg !Reg Reg !Reg !Reg !Reg !Reg RegE$LARe!Reg !RegO Reg !Reg !Reg !Reg !Reg LARo LARe: -Pic : !Pic !Pic !Pic Pic !Pic !Pic !Pic !Pic - !Pic !Pic !Pic Pic !Pic Pic !Pic !Pic RegE!RegO: -LF : !LF !LF !LF !LF !LF !LF !LF !LF - !LF !LF !LF LF !LF #Ext #Ext #Ext RegE!RegO: -Pre : !Otr !Otr Otr Otr Otr Otr Otr Otr - Otr Otr Otr Otr !Otr !Otr !Otr !Otr RegE!RegO: -L : !L !L !L L L L !L !L - !L !L !L L !L !L !L !L RegE!RegO: -V : !V !V !V V !V V V !V - !V !V !V V !V !V !V !V RegE!RegO: -T : !T !T !T T !T !T T !T - !T !T !T T !T !T !T !T RegE!RegO: -LV : !L !L !L L !L L L !L - !L !L !L L !L !L !L !L RegE!RegO: -LVT : !L !L !L L !L !L L !L - !L !L !L L !L !L !L !L RegE!RegO: -OInC: !InC !InC !InC InC !InC !InC !InC !InC - !InC !InC !InC InC !InC !InC !InC InC RegE!RegO: -ZWJ : !Ext !Ext !Ext Ext !Ext !Ext !Ext $LAZP - !Ext $LAIC!Ext Ext !Ext !Ext LAIC LAIL RegE!RegO: -EInE: !Ext !Ext !Ext Ext !Ext !Ext !Ext !Ext - !Ext $LAIC!Ext Ext !Ext LAZP LAIC LAIL RegE!RegO: -EInL: !Ext !Ext !Ext Ext !Ext !Ext !Ext !Ext - !Ext $LAIL!Ext Ext !Ext LAZP LAIL LAIL RegE!RegO: -SoT : ! - ! - ! - ! - ! - ! - ! - ! - - ! - ! - ! - - - #Ext #Ext #Ext RegE!RegO: +Stat: Cat + : CR Ctl Otr Ext Spc Reg Pic LF Pre L V T LV LVT OInC ZWJ EInE EInL SoT : +----------------------------------------------------------------------------------------------------- +Brk :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF !Otr !L !V !T !L !L !InC !Ext !Ext !Ext ! - : +LF : Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF !Otr !L !V !T !L !L !InC !Ext !Ext !Ext ! - : +Otr :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF Otr !L !V !T !L !L !InC !Ext !Ext !Ext ! - : +Ext :!Brk !Brk Otr Ext Ext Reg Pic !LF Otr L V T L L InC Ext Ext Ext ! - : +L :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF Otr L !V !T !L !L !InC !Ext !Ext !Ext ! - : +V :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF Otr L V !T L !L !InC !Ext !Ext !Ext ! - : +T :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF Otr !L V T L L !InC !Ext !Ext !Ext ! - : +Pic :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF Otr !L !V !T !L !L !InC $LAZP!Ext !Ext ! - : +RegO: - - - - - RegE - - - - - - - - - - - - - : +Reg :!Brk !Brk !Otr !Ext !Ext $LARe!Pic !LF Otr !L !V !T !L !L !InC !Ext !Ext !Ext ! - : +InC :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF Otr !L !V !T !L !L !InC $LAIC$LAIC$LAIL! - : +RegE:!Brk !Brk !Otr !Ext !Ext !RegO!Pic !LF Otr !L !V !T !L !L !InC !Ext !Ext !Ext ! - : +EoTN: Brk Brk Otr Ext Ext Reg Pic LF Otr L V T L L InC Ext Ext Ext - : +EoT :!Brk !Brk !Otr !Ext !Ext !Reg !Pic !LF !Otr !L !V !T !L !L !InC !Ext !Ext !Ext - : +LAZP:#Ext #Ext !Otr LAZP!Ext !Reg Pic #Ext !Otr !L !V !T !L !L !InC !Ext LAZP LAZP#Ext : +LAIC:#Ext #Ext !Otr !Ext !Ext !Reg !Pic #Ext !Otr !L !V !T !L !L !InC LAIC LAIC LAIL#Ext : +LAIL:#Ext #Ext !Otr !Ext !Ext !Reg !Pic #Ext !Otr !L !V !T !L !L InC LAIL LAIL LAIL#Ext : +LARe: RegE RegE RegE RegE RegE LARo RegE RegE RegE RegE RegE RegE RegE RegE RegE RegE RegE RegE RegE: +LARo:!RegO!RegO!RegO!RegO!RegO LARe!RegO!RegO!RegO!RegO!RegO!RegO!RegO!RegO!RegO!RegO!RegO!RegO!RegO: """; // The look-ahead part of the state machine is triggered by the `$`-transitions @@ -552,8 +655,14 @@ void writeBackwardAutomaton(StringSink buffer, {required bool verbose}) { } void _writeForwardTable(Uint16List table, int automatonRowLength) { - var automaton = _generateTable(table, automatonRowLength, idStateCount, - stateShortNames, categoryShortNames, idStateSoTNoBreak); + var automaton = _generateTable( + table, + automatonRowLength, + idStateCount, + stateShortNames, + backStateShortNames, + categoryShortNames, + idStateSoTNoBreak); stdout.write(automaton); if (automaton != expectedAutomatonDescription) { stderr @@ -569,6 +678,7 @@ void _writeBackTable(Uint16List table, int automatonRowLength) { automatonRowLength, backStateWithLACount, backStateShortNames, + backStateShortNames, backCategoryNames, idStateEoTNoBreak, ); @@ -582,7 +692,8 @@ void _writeBackTable(Uint16List table, int automatonRowLength) { /// Writes an automaton table to string, for debugging. /// -/// The table has size `maxState * automatonRowLength`, and `automatonRowLength >= categoryCount`. +/// The table has size `maxState * automatonRowLength`, +/// and `automatonRowLength >= categoryCount`. /// The [stateNames] are the names of the states for this particular automaton /// (differs between forward and backward automaton). /// It has a name for every target state that occurs in the *table*. @@ -592,38 +703,47 @@ void _writeBackTable(Uint16List table, int automatonRowLength) { /// states above that, if any, are synthetic states that trigger non- /// automaton based scanning. /// The [ignoreState] is a single state that is not displayed. -String _generateTable(Uint16List table, int automatonRowLength, int stateCount, - List stateNames, List categoryNames, int ignoreState) { +String _generateTable( + Uint16List table, + int automatonRowLength, + int stateCount, + List stateNames, + List lookaheadStateNames, + List categoryNames, + int ignoreState) { assert(automatonRowLength >= categoryCount); assert(table.length == stateCount * automatonRowLength); var buf = StringBuffer(); - buf.writeln("Cat : State"); + buf.writeln("Stat: Cat"); var preHeaderLength = buf.length; - buf.write(" : "); - for (var i = 0; i < stateCount; i++) { + buf.write(" :"); + for (var i = 0; i < categoryCount; i++) { buf ..write(' ') - ..write(stateNames[i].padRight(4)); + ..write(categoryNames[i].padRight(4)); } buf.writeln(":"); var lineLength = buf.length - preHeaderLength; buf.writeln("-" * (lineLength - 1)); - for (var ci = 0; ci < categoryCount; ci++) { - var catName = categoryNames[ci]; + for (var si = 0; si < stateCount; si++) { + var stateName = stateNames[si]; buf - ..write(catName.padRight(4)) - ..write(': '); - - for (var si = 0; si < stateCount; si++) { + ..write(stateName.padRight(4)) + ..write(':'); + for (var ci = 0; ci < categoryCount; ci++) { var value = table[si * automatonRowLength + ci]; var targetState = value ~/ automatonRowLength; - var prefix = r" !$#"[value & maskFlags]; - var idStateName = stateNames[targetState]; + var flags = value & maskFlags; + var prefix = r" !$#"[flags]; + + var targetStateName = (flags == flagLookahead) + ? lookaheadStateNames[targetState] + : stateNames[targetState]; // EoT is marker for unreachable states. - if (targetState == ignoreState) idStateName = " - "; + if (targetState == ignoreState) targetStateName = " - "; buf ..write(prefix) - ..write(idStateName.padRight(4)); + ..write(targetStateName.padRight(4)); } buf.writeln(":"); } diff --git a/pkgs/characters/tool/src/debug_names.dart b/pkgs/characters/tool/src/debug_names.dart index 22b4b2a1..a8a99591 100644 --- a/pkgs/characters/tool/src/debug_names.dart +++ b/pkgs/characters/tool/src/debug_names.dart @@ -88,7 +88,19 @@ final stateNames = List.filled(idStateCount, "") ..[idStateRegionalSingle] = "RegionalSingle" ..[idStateSoTNoBreak] = "SoTNoBreak" ..[idStateInC] = "InC" - ..[idStateInCL] = "InCL"; + ..[idStateInCL] = "InCL" + ..[idStateCAny] = "?" + ..[idStateCZWJ] = "?+ZWJ" + ..[idStateCIE] = "?+IndicExtend" + ..[idStateCIL] = "?+IndicLinked" + ..[idStateCIEZ] = "?+IndicExtendZWJ" + ..[idStateCILZ] = "?+IndicLinkedZWJ" + ..[idStateCZIE] = "?+ZWJIndicExtend" + ..[idStateCZIL] = "?+ZWJIndicLinked" + ..[idStateCExt] = "?+Extend" + ..[idStateCExZ] = "?+ExtendZWJ" + ..[idStateCReg] = "?+Reg" + ; final stateShortNames = List.filled(idStateCount, "") ..[idStateSoT] = "SoT" @@ -104,7 +116,19 @@ final stateShortNames = List.filled(idStateCount, "") ..[idStateRegionalSingle] = "Reg" ..[idStateSoTNoBreak] = "SoTN" ..[idStateInC] = "InC" - ..[idStateInCL] = "InCL"; + ..[idStateInCL] = "InCL" + ..[idStateCAny] = "CAny" + ..[idStateCZWJ] = "CZWJ" + ..[idStateCIE] = "CIE" + ..[idStateCIL] = "CIL" + ..[idStateCIEZ] = "CIEZ" + ..[idStateCILZ] = "CILZ" + ..[idStateCZIE] = "CZIE" + ..[idStateCZIL] = "CZIL" + ..[idStateCExt] = "CExt" + ..[idStateCExZ] = "CExZ" + ..[idStateCReg] = "CReg" + ; final backStateNames = List.filled(backStateWithLACount, "") ..[idStateEoT] = "EoT"