Skip to content

Commit

Permalink
Change GetLaTeXInputUnicodeCharacter() to returns UTF-16 characters d…
Browse files Browse the repository at this point in the history
…irectly

to avoid extra UTF-32 to UTF-16 conversion, issue zufuliu#289.
  • Loading branch information
zufuliu committed Feb 28, 2021
1 parent 3f09f65 commit 85302ea
Show file tree
Hide file tree
Showing 6 changed files with 1,720 additions and 1,731 deletions.
4 changes: 2 additions & 2 deletions scintilla/include/LaTeXInput.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ extern "C" {
#define EnableLaTeXLikeEmojiInput 1

//++Autogenerated -- start of section automatically generated
// input sequences based on Julia version 1.7.0-DEV.623 (Saturday 27 February 2021),
// input sequences based on Julia version 1.7.0-DEV.625 (Saturday 27 February 2021),
// documented at https://docs.julialang.org/en/v1/manual/unicode-input/

enum {
Expand Down Expand Up @@ -53,7 +53,7 @@ extern const char * const kAllEmojiInputSequences;
#endif

/*!
* @brief Get Unicode characters for LaTeX or Emoji input sequence.
* @brief Get Unicode UTF-16 characters for LaTeX or Emoji input sequence.
* example: \sum to U+2211 ∑, \:laughing: to U+1F606 😆 and \gvertneqq to U+2269 + U+FE00 ≩︀.
* @param sequence The input sequence withou the prefix '\', sequence[0] == ':' indicates Emoji.
* @param length Length for the input sequence withou the prefix '\'.
Expand Down
9 changes: 7 additions & 2 deletions scintilla/scripts/LaTeXInput.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def find_word_contains_punctuation(items):
result.sort()
return result


def json_dump(obj):
return json.dumps(obj, ensure_ascii=False, indent='\t')

Expand Down Expand Up @@ -163,14 +162,20 @@ def update_latex_input_data(input_name, input_map, max_hash_size):
if input_name == 'Emoji':
prefix = '\\:'
suffix = ':'
# see https://www.unicode.org/faq/utf_bom.html
LEAD_OFFSET = 0xD800 - (0x10000 >> 10)
for info in input_list:
character = info['character']
if len(character) == 1:
ch = ord(character)
if ch <= 0xffff:
code = '0x%04X' % ch
else:
code = '0x%X' % ch
character = ('U+%X, ' % ch) + character
# convert to UTF-16
lead = LEAD_OFFSET + (ch >> 10)
trail = 0xDC00 + (ch & 0x3FF)
code = "0x%04X'%04X" % (trail, lead)
else:
code = "0x%04X'%04X" % (ord(character[1]), ord(character[0]))
magic = info['magic']
Expand Down
16 changes: 0 additions & 16 deletions scintilla/src/UniConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ enum {
SURROGATE_TRAIL_FIRST = 0xDC00,
SURROGATE_TRAIL_LAST = 0xDFFF,
SUPPLEMENTAL_PLANE_FIRST = 0x10000,
MAX_UNICODE = 0x10ffff,
};

constexpr unsigned int UTF16CharLength(wchar_t uch) noexcept {
Expand All @@ -129,19 +128,4 @@ inline unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noe
return 2;
}

inline unsigned int UTF16FromLaTeXInputCharacter(unsigned int val, wchar_t *tbuf) noexcept {
if (val < SUPPLEMENTAL_PLANE_FIRST) {
tbuf[0] = static_cast<wchar_t>(val);
return 1;
}
if (val <= MAX_UNICODE) {
tbuf[0] = static_cast<wchar_t>(((val - SUPPLEMENTAL_PLANE_FIRST) >> 10) + SURROGATE_LEAD_FIRST);
tbuf[1] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST);
} else {
tbuf[0] = val & 0xffff;
tbuf[1] = val >> 16;
}
return 2;
}

}
2 changes: 1 addition & 1 deletion scintilla/win32/LaTeXInput.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct InputSequence {

template <typename T, uint32_t N>
constexpr uint32_t array_size([[maybe_unused]] const T (&a)[N]) noexcept {
return N ;
return N;
}

}
Expand Down
Loading

0 comments on commit 85302ea

Please sign in to comment.