Skip to content

Commit

Permalink
ICU-22707 UTC-179-A102 Consider using a macro throughout the rules fo…
Browse files Browse the repository at this point in the history
…r [\p{ea=F}\p{ea=W}\p{ea=H}].
  • Loading branch information
eggrobin committed Jul 16, 2024
1 parent e52a408 commit 793a0db
Showing 1 changed file with 10 additions and 15 deletions.
25 changes: 10 additions & 15 deletions icu4c/source/data/brkitr/rules/line.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,7 @@ $XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [:LineBreak = ZWJ:];

# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
# without a formal name. Because ICU rules require multiple uses of the expressions,
# give them a single definition with a name

$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

Expand Down Expand Up @@ -122,7 +117,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs
# AL_FOLLOW set of chars that can unconditionally follow an AL
# Needed in rules where stand-alone $CM s are treated as AL.
#
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


#
Expand Down Expand Up @@ -284,12 +279,12 @@ $LB18Breaks = [$LB8Breaks $SP];
$LB18NonBreaks $CM* $QU;
^$CM+ $QU;

[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}] - [$OP $GL]] / [\p{Pi} & $QU] $CM* [ [\p{ea=F}\p{ea=W}\p{ea=H}] - $CM];
[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}] - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ [\p{ea=F}\p{ea=W}\p{ea=H}] - $CM];
[$LB18NonBreaks & $EastAsian - [$OP $GL]] / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];

$QU $CM* .;
[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}]] $CM* [\p{Pf} & $QU] / [ [\p{ea=F}\p{ea=W}\p{ea=H}] - [$NS $BA $EX $CL $IN $IS $GL $CM]];
[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}]] $CM* [\p{Pf} & $QU] $CM* $CMX / [ [\p{ea=F}\p{ea=W}\p{ea=H}] - [$NS $BA $EX $CL $IN $IS $GL $CM]];
[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

# LB 20
# <break> $CB
Expand Down Expand Up @@ -329,7 +324,7 @@ $BB $CM* $LB20NonBreaks;
# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
# HL (HY | BA) x [^HL]
#
$HL $CM* ($HY | [ $BA - [\p{ea=F}\p{ea=W}\p{ea=H}] ] ) $CM* [^$CB $HL]?;
$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;

# LB 21b (forward) Don't break between SY and HL
# (break between HL and SY already disallowed by LB 13 above)
Expand Down Expand Up @@ -389,9 +384,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
$IS $CM* ($ALPlus | $HL);

# LB 30
($ALPlus | $HL | $NU) $CM* $OP30;
^$CM+ $OP30; # The $CM+ is from rule 10, an unattached CM is treated as AL.
$CP30 $CM* ($ALPlus | $HL | $NU);
($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
^$CM+ [$OP - $EastAsian]; # The $CM+ is from rule 10, an unattached CM is treated as AL.
[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

# LB 30a Do not break between regional indicators. Break after pairs of them.
# Tricky interaction with LB8a: ZWJ x . together with ZWJ acting like a CM.
Expand Down

0 comments on commit 793a0db

Please sign in to comment.