Skip to content

Commit

Permalink
Fix the Java old monkeys
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Nov 5, 2024
1 parent 435af1c commit 718c00f
Showing 1 changed file with 15 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ static class RBBICharMonkey extends RBBIMonkeyKind {
UnicodeSet fHangulSet;
UnicodeSet fZWJSet;
UnicodeSet fExtendedPictSet;
UnicodeSet fViramaSet;
UnicodeSet fLinkingConsonantSet;
UnicodeSet fExtCccZwjSet;
UnicodeSet fInCBLinkerSet;
UnicodeSet fInCBConsonantSet;
UnicodeSet fInCBExtendSet;
UnicodeSet fAnySet;


Expand Down Expand Up @@ -176,11 +176,9 @@ static class RBBICharMonkey extends RBBIMonkeyKind {
fHangulSet.addAll(fLVTSet);

fExtendedPictSet = new UnicodeSet("[:Extended_Pictographic:]");
fViramaSet = new UnicodeSet("[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ "\\p{Indic_Syllabic_Category=Virama}]");
fLinkingConsonantSet = new UnicodeSet("[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ "\\p{Indic_Syllabic_Category=Consonant}]");
fExtCccZwjSet = new UnicodeSet("[[\\p{gcb=Extend}-\\p{ccc=0}] \\p{gcb=ZWJ}]");
fInCBLinkerSet = new UnicodeSet("[\\p{InCB=Linker}]");
fInCBConsonantSet = new UnicodeSet("[[\\p{InCB=Consonant}]");
fInCBExtendSet = new UnicodeSet("[\\p{InCB=Extend}]");
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]");


Expand All @@ -196,9 +194,9 @@ static class RBBICharMonkey extends RBBIMonkeyKind {
fSets.add(fAnySet); fClassNames.add("Any");
fSets.add(fZWJSet); fClassNames.add("ZWJ");
fSets.add(fExtendedPictSet); fClassNames.add("ExtendedPict");
fSets.add(fViramaSet); fClassNames.add("Virama");
fSets.add(fLinkingConsonantSet); fClassNames.add("LinkingConsonant");
fSets.add(fExtCccZwjSet); fClassNames.add("ExtCccZwj");
fSets.add(fInCBLinkerSet); fClassNames.add("InCB=Linker");
fSets.add(fInCBConsonantSet); fClassNames.add("InCB=Consonant");
fSets.add(fInCBExtendSet); fClassNames.add("InCB=Extend");
}


Expand Down Expand Up @@ -315,17 +313,18 @@ int next(int prevPos) {
}

// Note: Viramas are also included in the ExtCccZwj class.
if (fLinkingConsonantSet.contains(c2)) {
if (fInCBConsonantSet.contains(c2)) {
int pi = p1;
boolean sawVirama = false;
while (pi > 0 && fExtCccZwjSet.contains(fText.codePointAt(pi))) {
if (fViramaSet.contains(fText.codePointAt(pi))) {
while (pi > 0 && (fInCBExtendSet.contains(fText.codePointAt(pi)) ||
fInCBLinkerSet.contains(fText.codePointAt(pi)))) {
if (fInCBLinkerSet.contains(fText.codePointAt(pi))) {
sawVirama = true;
}
pi = fText.offsetByCodePoints(pi, -1);
}
if (sawVirama && fLinkingConsonantSet.contains(fText.codePointAt(pi))) {
setAppliedRule(p2, "GB 9.3 LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant");
if (sawVirama && fInCBConsonantSet.contains(fText.codePointAt(pi))) {
setAppliedRule(p2, "GB9c \\p{InCB=Consonant} [ \\p{InCB=Extend} \\p{InCB=Linker} ]* \\p{InCB=Linker} [ \\p{InCB=Extend} \\p{InCB=Linker} ]* × \\p{InCB=Consonant})");
continue;
}
}
Expand Down

0 comments on commit 718c00f

Please sign in to comment.