Skip to content

Commit

Permalink
Fix decoding of UTF-8 sequence length (fix jqlang#922)
Browse files Browse the repository at this point in the history
  • Loading branch information
dtolnay committed Aug 22, 2015
1 parent 370833d commit e975c19
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
9 changes: 5 additions & 4 deletions jv_unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,12 @@ int jvp_utf8_is_valid(const char* in, const char* end) {
return 1;
}

/* Assumes startchar is the first byte of a valid character sequence */
int jvp_utf8_decode_length(char startchar) {
if ((startchar & 0x80) == 0) return 1;
else if ((startchar & 0xC0) == 0xC0) return 2;
else if ((startchar & 0xE0) == 0xE0) return 3;
else return 4;
if ((startchar & 0x80) == 0) return 1; // 0___ ____
else if ((startchar & 0xE0) == 0xC0) return 2; // 110_ ____
else if ((startchar & 0xF0) == 0xE0) return 3; // 1110 ____
else return 4; // 1111 ____
}

int jvp_utf8_encode_length(int codepoint) {
Expand Down
5 changes: 5 additions & 0 deletions tests/onig.test
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ gsub("(?<d>\\d)"; ":\(.d);")
gsub("(?<x>.)[^a]*"; "+\(.x)-")
"Abcabc"
"+A-+a-"

# utf-8
sub( "(?<x>.)"; "\(.x)!")
"’"
"’!"

0 comments on commit e975c19

Please sign in to comment.