Skip to content

Commit

Permalink
Merge remote-tracking branch 'valloric/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
puremourning committed Dec 15, 2023
2 parents 5423c7d + 0a276f7 commit c01d330
Show file tree
Hide file tree
Showing 22 changed files with 1,704 additions and 781 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
[submodule "third_party/watchdog"]
path = third_party/watchdog_deps/watchdog
url = https://github.com/gorakhargosh/watchdog
[submodule "third_party/mrab-regex"]
path = third_party/mrab-regex
url = https://bitbucket.org/mrabarnett/mrab-regex.git
[submodule "third_party/mrab-regex-github"]
path = third_party/mrab-regex-github
url = https://github.com/mrabarnett/mrab-regex
2 changes: 1 addition & 1 deletion CORE_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
47
48
2 changes: 1 addition & 1 deletion build.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ def BuildRegexModule( script_args ):
lib_dir = p.join( DIR_OF_THIRD_PARTY, 'regex-build' )

try:
os.chdir( p.join( DIR_OF_THIRD_PARTY, 'mrab-regex' ) )
os.chdir( p.join( DIR_OF_THIRD_PARTY, 'mrab-regex-github' ) )

RemoveDirectoryIfExists( build_dir )
RemoveDirectoryIfExists( lib_dir )
Expand Down
14 changes: 7 additions & 7 deletions cpp/ycm/Character.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ bool CodePointCompare( const CodePoint *left, const CodePoint *right ) {


// Sort the code points according to the Canonical Ordering Algorithm.
// See https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G49591
// See https://www.unicode.org/versions/latest/ch03.pdf#G49591
CodePointSequence CanonicalSort( CodePointSequence code_points ) {
auto code_point_start = code_points.begin();
auto code_point_end = code_points.end();
Expand Down Expand Up @@ -64,7 +64,7 @@ CodePointSequence CanonicalSort( CodePointSequence code_points ) {

// Decompose a UTF-8 encoded string into a sequence of code points according to
// Canonical Decomposition. See
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G733
// https://www.unicode.org/versions/latest/ch03.pdf#G733
CodePointSequence CanonicalDecompose( std::string_view text ) {
assert( NormalizeInput( text ) == text );
return CanonicalSort( BreakIntoCodePoints( text ) );
Expand All @@ -78,7 +78,7 @@ Character::Character( std::string_view character )
is_punctuation_( false ),
is_uppercase_( false ) {
// Normalize the character through NFD (Normalization Form D). See
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G49621
// https://www.unicode.org/versions/latest/ch03.pdf#G49621
CodePointSequence code_points = CanonicalDecompose( character );

for ( const auto &code_point : code_points ) {
Expand All @@ -89,10 +89,10 @@ Character::Character( std::string_view character )
is_punctuation_ |= code_point->IsPunctuation();
is_uppercase_ |= code_point->IsUppercase();

switch ( code_point->GetBreakProperty() ) {
case BreakProperty::PREPEND:
case BreakProperty::EXTEND:
case BreakProperty::SPACINGMARK:
switch ( code_point->GetGraphemeBreakProperty() ) {
case GraphemeBreakProperty::PREPEND:
case GraphemeBreakProperty::EXTEND:
case GraphemeBreakProperty::SPACINGMARK:
is_base_ = false;
break;
default:
Expand Down
2 changes: 1 addition & 1 deletion cpp/ycm/Character.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace YouCompleteMe {
// This class represents a UTF-8 character. It takes a UTF-8 encoded string
// corresponding to a grapheme cluster (see
// https://www.unicode.org/glossary/#grapheme_cluster), normalize it through NFD
// (see https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G49621), and
// (see https://www.unicode.org/versions/latest/ch03.pdf#G49621), and
// compute the folded and swapped case versions of the normalized character. It
// also holds some properties like if the character is a letter or a
// punctuation, and if it is uppercase.
Expand Down
4 changes: 2 additions & 2 deletions cpp/ycm/ClangCompleter/TranslationUnitStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ shared_ptr< TranslationUnit > TranslationUnitStore::Get(

bool TranslationUnitStore::Remove( const std::string &filename ) {
lock_guard< mutex > lock( filename_to_translation_unit_and_flags_mutex_ );
Erase( filename_to_flags_hash_, filename );
return Erase( filename_to_translation_unit_, filename );
filename_to_flags_hash_.erase( filename );
return filename_to_translation_unit_.erase( filename );
}


Expand Down
13 changes: 8 additions & 5 deletions cpp/ycm/CodePoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ RawCodePoint FindCodePoint( std::string_view text ) {
code_points.is_punctuation[ index ],
code_points.is_uppercase[ index ],
code_points.break_property[ index ],
code_points.combining_class[ index ] };
code_points.combining_class[ index ],
code_points.indic_conjunct_break[ index ] };
}

return { text, text, text, text, false, false, false, 0, 0 };
return { text, text, text, text, false, false, false, 0, 0, 0 };
}

} // unnamed namespace
Expand All @@ -87,9 +88,11 @@ CodePoint::CodePoint( RawCodePoint&& code_point )
is_letter_( code_point.is_letter ),
is_punctuation_( code_point.is_punctuation ),
is_uppercase_( code_point.is_uppercase ),
break_property_(
static_cast< BreakProperty >( code_point.break_property ) ),
combining_class_( code_point.combining_class ) {
grapheme_break_property_(
static_cast< GraphemeBreakProperty >( code_point.grapheme_break_property ) ),
combining_class_( code_point.combining_class ),
indic_conjunct_break_property_(
static_cast< IndicConjunctBreakProperty >( code_point.indic_conjunct_break_property ) ) {
}


Expand Down
29 changes: 22 additions & 7 deletions cpp/ycm/CodePoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
namespace YouCompleteMe {

// See
// http://www.unicode.org/reports/tr29/tr29-37.html#Grapheme_Cluster_Break_Property_Values
// http://www.unicode.org/reports/tr29#Grapheme_Cluster_Break_Property_Values
// NOTE: The properties must take the same value as the ones defined in the
// update_unicode.py script.
enum class BreakProperty : uint8_t {
enum class GraphemeBreakProperty : uint8_t {
OTHER = 0,
CR = 1,
LF = 2,
Expand All @@ -46,6 +46,15 @@ enum class BreakProperty : uint8_t {
LVT = 13,
EXTPICT = 18
};
// See https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break
// NOTE: The properties must take the same value as the ones defined in the
// update_unicode.py script.
enum class IndicConjunctBreakProperty : uint8_t {
None = 0,
LINKER = 1,
CONSONANT = 2,
EXTEND = 3,
};


// This is the structure used to store the data in the Unicode table. See the
Expand All @@ -58,8 +67,9 @@ struct RawCodePoint {
bool is_letter;
bool is_punctuation;
bool is_uppercase;
uint8_t break_property;
uint8_t grapheme_break_property;
uint8_t combining_class;
uint8_t indic_conjunct_break_property;
};


Expand All @@ -81,7 +91,7 @@ struct RawCodePoint {
// - its breaking property: used to split a word into characters.
// - its combining class: used to sort a sequence of code points according to
// the Canonical Ordering algorithm (see
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G49591).
// https://www.unicode.org/versions/latest/ch03.pdf#G49591).
class CodePoint {
public:
YCM_EXPORT explicit CodePoint( std::string_view code_point );
Expand Down Expand Up @@ -115,14 +125,18 @@ class CodePoint {
return is_uppercase_;
}

inline BreakProperty GetBreakProperty() const {
return break_property_;
inline GraphemeBreakProperty GetGraphemeBreakProperty() const {
return grapheme_break_property_;
}

inline uint8_t CombiningClass() const {
return combining_class_;
}

inline IndicConjunctBreakProperty GetIndicConjunctBreakProperty() const {
return indic_conjunct_break_property_;
}

inline bool operator< ( const CodePoint &other ) const {
return combining_class_ < other.combining_class_;
}
Expand All @@ -136,8 +150,9 @@ class CodePoint {
bool is_letter_;
bool is_punctuation_;
bool is_uppercase_;
BreakProperty break_property_;
GraphemeBreakProperty grapheme_break_property_;
uint8_t combining_class_;
IndicConjunctBreakProperty indic_conjunct_break_property_;
};


Expand Down
40 changes: 21 additions & 19 deletions cpp/ycm/UnicodeTable.inc

Large diffs are not rendered by default.

13 changes: 0 additions & 13 deletions cpp/ycm/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,19 +99,6 @@ FindWithDefault( Container &container,
}


template <class Container, class Key>
bool Erase( Container &container, const Key &key ) {
typename Container::iterator it = container.find( key );

if ( it != container.end() ) {
container.erase( it );
return true;
}

return false;
}


// Shrink a vector to its sorted |num_sorted_elements| smallest elements. If
// |num_sorted_elements| is 0 or larger than the vector size, sort the whole
// vector.
Expand Down
Loading

0 comments on commit c01d330

Please sign in to comment.