Skip to content

Commit

Permalink
1615: Fix bug in getCJKAdjustedLength() that double counts supplement…
Browse files Browse the repository at this point in the history
…ary code points

getCJKAdjustedLength calculates the column width of text strings.

But it operates on chars which causes it to double count supplementary code
points. Supplimentary code points have a high and a low code unit(char).

This commit reworks this logic to be based on code points. The code assembles
the code points by hand as String.codePoints() was only added in 1.8 and
picocli has a source level of 1.5
  • Loading branch information
gwalbran committed Feb 27, 2022
1 parent 8e21b71 commit 4ccad5b
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 8 deletions.
33 changes: 25 additions & 8 deletions src/main/java/picocli/CommandLine.java
Original file line number Diff line number Diff line change
Expand Up @@ -7921,16 +7921,16 @@ public void run() {
* @since 4.0 */
public UsageMessageSpec autoWidth(boolean detectTerminalSize) { autoWidth = detectTerminalSize; return this; }
/**
* Given a character, is this character considered to be a CJK character?
* Given a codePoint, is this codePoint considered to be a CJK character?
* Shamelessly stolen from
* <a href="http://stackoverflow.com/questions/1499804/how-can-i-detect-japanese-text-in-a-java-string">StackOverflow</a>
* where it was contributed by user Rakesh N. (Upvote! :-) )
* @param c Character to test
* @param codePoint code point to test
* @return {@code true} if the character is a CJK character
*/
static boolean isCharCJK(char c) {
Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(c);
return (c == 0x00b1
static boolean isCodePointCJK(int codePoint) {
Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codePoint);
return (codePoint == 0x00b1
|| unicodeBlock == Character.UnicodeBlock.HIRAGANA)
|| (unicodeBlock == Character.UnicodeBlock.KATAKANA)
|| (unicodeBlock == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS)
Expand All @@ -7946,7 +7946,7 @@ static boolean isCharCJK(char c) {
|| (unicodeBlock == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION)
|| (unicodeBlock == Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS)
//The magic number here is the separating index between full-width and half-width
|| (unicodeBlock == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS && c < 0xFF61);
|| (unicodeBlock == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS && codePoint < 0xFF61);
}

/** Returns the help section renderers for the predefined section keys. see: {@link #sectionKeys()} */
Expand Down Expand Up @@ -18106,10 +18106,27 @@ public int getCJKAdjustedLength() {
* @return the number of columns that the specified portion of this Text will occupy on the console, adjusted for wide CJK characters
* @since 4.0 */
public int getCJKAdjustedLength(int fromPosition, int charCount) {
String lengthOf = plain.substring(from, from + charCount);

int result = 0;
for (int i = fromPosition; i < fromPosition + charCount; i++) {
result += UsageMessageSpec.isCharCJK(plain.charAt(i)) ? 2 : 1;
int i = 0;
while (i < lengthOf.length()) {
int codePoint;
char c1 = lengthOf.charAt(i++);
if (!Character.isHighSurrogate(c1) || i >= length) {
codePoint = c1;
} else {
char c2 = lengthOf.charAt(i);
if (Character.isLowSurrogate(c2)) {
i++;
codePoint = Character.toCodePoint(c1, c2);
} else {
codePoint = c1;
}
}
result += UsageMessageSpec.isCodePointCJK(codePoint) ? 2 : 1;
}

return result;
}
}
Expand Down
39 changes: 39 additions & 0 deletions src/test/java/picocli/CJKLengthTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

package picocli;

import static org.junit.Assert.assertEquals;
import static picocli.CommandLine.Help.Ansi;

import org.junit.Rule;
import org.junit.Test;
import org.junit.contrib.java.lang.system.ProvideSystemProperty;
import org.junit.contrib.java.lang.system.RestoreSystemProperties;
import org.junit.rules.TestRule;

public class CJKLengthTest {

// allows tests to set any kind of properties they like, without having to individually roll them back
@Rule
public final TestRule restoreSystemProperties = new RestoreSystemProperties();

@Rule
public final ProvideSystemProperty ansiOFF = new ProvideSystemProperty("picocli.ansi", "false");

@Test
public void testCJKLengths() {
testLength("abc", 3);
// some double width Hiragana characters
testLength("平仮名", 6);

// a supplementary code point character (has a high and low code point values)
testLength("𝑓", 1);
}

private void testLength(String of, int expectedLength) {
Ansi.Text text = Ansi.OFF.text(of);
int cjkWidth = text.getCJKAdjustedLength();
assertEquals(String.format("Expected '%s' to have width %d but is %d", of, expectedLength, cjkWidth),
expectedLength, cjkWidth);
}

}

0 comments on commit 4ccad5b

Please sign in to comment.