diff --git a/cmake/core-files.cmake b/cmake/core-files.cmake index 45305fdd65a..7936eead265 100644 --- a/cmake/core-files.cmake +++ b/cmake/core-files.cmake @@ -439,6 +439,8 @@ set(MBGL_CORE_FILES src/mbgl/util/http_header.hpp src/mbgl/util/http_timeout.cpp src/mbgl/util/http_timeout.hpp + src/mbgl/util/i18n.cpp + src/mbgl/util/i18n.hpp src/mbgl/util/interpolate.hpp src/mbgl/util/intersection_tests.cpp src/mbgl/util/intersection_tests.hpp diff --git a/package.json b/package.json index c598a9472d3..a6e8b5c83de 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "lodash": "^4.16.4", "mapbox-gl-shaders": "mapbox/mapbox-gl-shaders#ec891ce5360e488d81f60991f95d2038b83c4e3c", "mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#7f62a4fc9f21e619824d68abbc4b03cbc1685572", - "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#af9ee275f19e81f839a2733e6906c3fac272620e", + "mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#2f6453e17cd3d17ab2ff677056f65b9cab70f2e8", "mkdirp": "^0.5.1", "node-cmake": "^1.2.1", "request": "^2.72.0", diff --git a/platform/ios/CHANGELOG.md b/platform/ios/CHANGELOG.md index cc1f050af04..24ef0fb595c 100644 --- a/platform/ios/CHANGELOG.md +++ b/platform/ios/CHANGELOG.md @@ -21,6 +21,7 @@ Mapbox welcomes participation and contributions from everyone. Please read [CONT * TileJSON manifests can now specify `"scheme": "tms"` to indicate the use of [TMS](https://en.wikipedia.org/wiki/Tile_Map_Service) coordinates. ([#2270](https://github.com/mapbox/mapbox-gl-native/pull/2270)) * Fixed an issue causing abstract MGLMultiPointFeature objects to be returned in feature query results. Now concrete MGLPointCollectionFeature objects are returned. ([#6742](https://github.com/mapbox/mapbox-gl-native/pull/6742)) * Fixed rendering artifacts and missing glyphs that occurred after viewing a large number of CJK characters on the map. ([#5908](https://github.com/mapbox/mapbox-gl-native/pull/5908)) +* Improved the line wrapping behavior of point-placed labels written in Chinese, Japanese, and Yi. ([#6828](https://github.com/mapbox/mapbox-gl-native/pull/6828)) * `-[MGLMapView resetPosition]` now resets to the current style’s default center coordinates, zoom level, direction, and pitch, if specified. ([#6127](https://github.com/mapbox/mapbox-gl-native/pull/6127)) * Fixed an issue where feature querying sometimes failed to return the expected features when the map was tilted. ([#6773](https://github.com/mapbox/mapbox-gl-native/pull/6773)) * MGLFeature’s `attributes` and `identifier` properties are now writable. ([#6728](https://github.com/mapbox/mapbox-gl-native/pull/6728)) diff --git a/platform/macos/CHANGELOG.md b/platform/macos/CHANGELOG.md index 9f08e5de9d8..de33a158b4b 100644 --- a/platform/macos/CHANGELOG.md +++ b/platform/macos/CHANGELOG.md @@ -17,6 +17,7 @@ * TileJSON manifests can now specify `"scheme": "tms"` to indicate the use of [TMS](https://en.wikipedia.org/wiki/Tile_Map_Service) coordinates. ([#2270](https://github.com/mapbox/mapbox-gl-native/pull/2270)) * Fixed an issue causing abstract `MGLMultiPointFeature` objects to be returned in feature query results. Now concrete `MGLPointCollectionFeature` objects are returned. ([#6742](https://github.com/mapbox/mapbox-gl-native/pull/6742)) * Fixed rendering artifacts and missing glyphs that occurred after viewing a large number of CJK characters on the map. ([#5908](https://github.com/mapbox/mapbox-gl-native/pull/5908)) +* Improved the line wrapping behavior of point-placed labels written in Chinese, Japanese, and Yi. ([#6828](https://github.com/mapbox/mapbox-gl-native/pull/6828)) * Fixed an issue where the style zoom levels were not respected when deciding when to render a layer. ([#5811](https://github.com/mapbox/mapbox-gl-native/issues/5811)) * Fixed an issue where feature querying sometimes failed to return the expected features when the map was tilted. ([#6773](https://github.com/mapbox/mapbox-gl-native/pull/6773)) * MGLFeature’s `attributes` and `identifier` properties are now writable. ([#6728](https://github.com/mapbox/mapbox-gl-native/pull/6728)) diff --git a/src/mbgl/text/glyph_set.cpp b/src/mbgl/text/glyph_set.cpp index 0875a83850d..c778de207b5 100644 --- a/src/mbgl/text/glyph_set.cpp +++ b/src/mbgl/text/glyph_set.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -54,7 +55,8 @@ const Shaping GlyphSet::getShaping(const std::u32string &string, const float max if (shaping.positionedGlyphs.empty()) return shaping; - lineWrap(shaping, lineHeight, maxWidth, horizontalAlign, verticalAlign, justify, translate); + lineWrap(shaping, lineHeight, maxWidth, horizontalAlign, verticalAlign, justify, translate, + util::i18n::allowsIdeographicBreaking(string)); return shaping; } @@ -85,9 +87,10 @@ void justifyLine(std::vector &positionedGlyphs, const std::map< } } -void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, const float maxWidth, - const float horizontalAlign, const float verticalAlign, - const float justify, const Point &translate) const { +void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, float maxWidth, + const float horizontalAlign, const float verticalAlign, + const float justify, const Point &translate, + bool useBalancedIdeographicBreaking) const { uint32_t lastSafeBreak = 0; uint32_t lengthBeforeCurrentLine = 0; @@ -99,6 +102,12 @@ void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, const float ma std::vector &positionedGlyphs = shaping.positionedGlyphs; if (maxWidth) { + if (useBalancedIdeographicBreaking) { + auto lastPositionedGlyph = positionedGlyphs[positionedGlyphs.size() - 1]; + uint32_t estimatedLineCount = std::fmax(1, std::ceil(lastPositionedGlyph.x / maxWidth)); + maxWidth = lastPositionedGlyph.x / estimatedLineCount; + } + for (uint32_t i = 0; i < positionedGlyphs.size(); i++) { PositionedGlyph &shape = positionedGlyphs[i]; @@ -119,8 +128,7 @@ void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, const float ma // Collapse invisible characters. uint32_t breakGlyph = positionedGlyphs[lastSafeBreak].glyph; uint32_t lineEnd = lastSafeBreak; - if (breakGlyph == 0x20 /* space */ - || breakGlyph == 0x200b /* zero-width space */) { + if (util::i18n::isVisible(breakGlyph)) { lineEnd--; } @@ -133,17 +141,10 @@ void GlyphSet::lineWrap(Shaping &shaping, const float lineHeight, const float ma line++; } - // Spaces, plus word-breaking punctuation that often appears without surrounding spaces. - if (shape.glyph == 0x20 /* space */ - || shape.glyph == 0x26 /* ampersand */ - || shape.glyph == 0x2b /* plus sign */ - || shape.glyph == 0x2d /* hyphen-minus */ - || shape.glyph == 0x2f /* solidus */ - || shape.glyph == 0xad /* soft hyphen */ - || shape.glyph == 0xb7 /* middle dot */ - || shape.glyph == 0x200b /* zero-width space */ - || shape.glyph == 0x2010 /* hyphen */ - || shape.glyph == 0x2013 /* en dash */) { + // Ideographic characters, spaces, and word-breaking punctuation that often appear without surrounding spaces. + if (useBalancedIdeographicBreaking + || util::i18n::allowsWordBreaking(shape.glyph) + || util::i18n::allowsIdeographicBreaking(shape.glyph)) { lastSafeBreak = i; } } diff --git a/src/mbgl/text/glyph_set.hpp b/src/mbgl/text/glyph_set.hpp index 37ffdb070a2..fed7960a5f7 100644 --- a/src/mbgl/text/glyph_set.hpp +++ b/src/mbgl/text/glyph_set.hpp @@ -13,7 +13,8 @@ class GlyphSet { float horizontalAlign, float verticalAlign, float justify, float spacing, const Point &translate) const; void lineWrap(Shaping &shaping, float lineHeight, float maxWidth, float horizontalAlign, - float verticalAlign, float justify, const Point &translate) const; + float verticalAlign, float justify, const Point &translate, + bool useBalancedIdeographicBreaking) const; private: std::map sdfs; diff --git a/src/mbgl/util/i18n.cpp b/src/mbgl/util/i18n.cpp new file mode 100644 index 00000000000..dbfa24c5cf4 --- /dev/null +++ b/src/mbgl/util/i18n.cpp @@ -0,0 +1,357 @@ +#include "i18n.hpp" + +namespace { + +/** Defines a function that returns true if a codepoint is in a named block. + @param name The name of the block in CamelCase. + @param first The first codepoint in the block, inclusive. + @param last The last codepoint in the block, inclusive. + */ +#define DEFINE_IS_IN_UNICODE_BLOCK(name, first, last) \ + inline bool isIn##name(uint32_t codepoint) { \ + return codepoint >= first && codepoint <= last; \ + } + +// The following table comes from . +// Keep it synchronized with . + +// DEFINE_IS_IN_UNICODE_BLOCK(BasicLatin, 0x0000, 0x007F) +// DEFINE_IS_IN_UNICODE_BLOCK(Latin1Supplement, 0x0080, 0x00FF) +// DEFINE_IS_IN_UNICODE_BLOCK(LatinExtendedA, 0x0100, 0x017F) +// DEFINE_IS_IN_UNICODE_BLOCK(LatinExtendedB, 0x0180, 0x024F) +// DEFINE_IS_IN_UNICODE_BLOCK(IPAExtensions, 0x0250, 0x02AF) +// DEFINE_IS_IN_UNICODE_BLOCK(SpacingModifierLetters, 0x02B0, 0x02FF) +// DEFINE_IS_IN_UNICODE_BLOCK(CombiningDiacriticalMarks, 0x0300, 0x036F) +// DEFINE_IS_IN_UNICODE_BLOCK(GreekandCoptic, 0x0370, 0x03FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Cyrillic, 0x0400, 0x04FF) +// DEFINE_IS_IN_UNICODE_BLOCK(CyrillicSupplement, 0x0500, 0x052F) +// DEFINE_IS_IN_UNICODE_BLOCK(Armenian, 0x0530, 0x058F) +// DEFINE_IS_IN_UNICODE_BLOCK(Hebrew, 0x0590, 0x05FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Arabic, 0x0600, 0x06FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Syriac, 0x0700, 0x074F) +// DEFINE_IS_IN_UNICODE_BLOCK(ArabicSupplement, 0x0750, 0x077F) +// DEFINE_IS_IN_UNICODE_BLOCK(Thaana, 0x0780, 0x07BF) +// DEFINE_IS_IN_UNICODE_BLOCK(NKo, 0x07C0, 0x07FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Samaritan, 0x0800, 0x083F) +// DEFINE_IS_IN_UNICODE_BLOCK(Mandaic, 0x0840, 0x085F) +// DEFINE_IS_IN_UNICODE_BLOCK(ArabicExtendedA, 0x08A0, 0x08FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Devanagari, 0x0900, 0x097F) +// DEFINE_IS_IN_UNICODE_BLOCK(Bengali, 0x0980, 0x09FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Gurmukhi, 0x0A00, 0x0A7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Gujarati, 0x0A80, 0x0AFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Oriya, 0x0B00, 0x0B7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Tamil, 0x0B80, 0x0BFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Telugu, 0x0C00, 0x0C7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Kannada, 0x0C80, 0x0CFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Malayalam, 0x0D00, 0x0D7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Sinhala, 0x0D80, 0x0DFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Thai, 0x0E00, 0x0E7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Lao, 0x0E80, 0x0EFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Tibetan, 0x0F00, 0x0FFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Myanmar, 0x1000, 0x109F) +// DEFINE_IS_IN_UNICODE_BLOCK(Georgian, 0x10A0, 0x10FF) +// DEFINE_IS_IN_UNICODE_BLOCK(HangulJamo, 0x1100, 0x11FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Ethiopic, 0x1200, 0x137F) +// DEFINE_IS_IN_UNICODE_BLOCK(EthiopicSupplement, 0x1380, 0x139F) +// DEFINE_IS_IN_UNICODE_BLOCK(Cherokee, 0x13A0, 0x13FF) +// DEFINE_IS_IN_UNICODE_BLOCK(UnifiedCanadianAboriginalSyllabics, 0x1400, 0x167F) +// DEFINE_IS_IN_UNICODE_BLOCK(Ogham, 0x1680, 0x169F) +// DEFINE_IS_IN_UNICODE_BLOCK(Runic, 0x16A0, 0x16FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Tagalog, 0x1700, 0x171F) +// DEFINE_IS_IN_UNICODE_BLOCK(Hanunoo, 0x1720, 0x173F) +// DEFINE_IS_IN_UNICODE_BLOCK(Buhid, 0x1740, 0x175F) +// DEFINE_IS_IN_UNICODE_BLOCK(Tagbanwa, 0x1760, 0x177F) +// DEFINE_IS_IN_UNICODE_BLOCK(Khmer, 0x1780, 0x17FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Mongolian, 0x1800, 0x18AF) +// DEFINE_IS_IN_UNICODE_BLOCK(UnifiedCanadianAboriginalSyllabicsExtended, 0x18B0, 0x18FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Limbu, 0x1900, 0x194F) +// DEFINE_IS_IN_UNICODE_BLOCK(TaiLe, 0x1950, 0x197F) +// DEFINE_IS_IN_UNICODE_BLOCK(NewTaiLue, 0x1980, 0x19DF) +// DEFINE_IS_IN_UNICODE_BLOCK(KhmerSymbols, 0x19E0, 0x19FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Buginese, 0x1A00, 0x1A1F) +// DEFINE_IS_IN_UNICODE_BLOCK(TaiTham, 0x1A20, 0x1AAF) +// DEFINE_IS_IN_UNICODE_BLOCK(CombiningDiacriticalMarksExtended, 0x1AB0, 0x1AFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Balinese, 0x1B00, 0x1B7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Sundanese, 0x1B80, 0x1BBF) +// DEFINE_IS_IN_UNICODE_BLOCK(Batak, 0x1BC0, 0x1BFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Lepcha, 0x1C00, 0x1C4F) +// DEFINE_IS_IN_UNICODE_BLOCK(OlChiki, 0x1C50, 0x1C7F) +// DEFINE_IS_IN_UNICODE_BLOCK(CyrillicExtendedC, 0x1C80, 0x1C8F) +// DEFINE_IS_IN_UNICODE_BLOCK(SundaneseSupplement, 0x1CC0, 0x1CCF) +// DEFINE_IS_IN_UNICODE_BLOCK(VedicExtensions, 0x1CD0, 0x1CFF) +// DEFINE_IS_IN_UNICODE_BLOCK(PhoneticExtensions, 0x1D00, 0x1D7F) +// DEFINE_IS_IN_UNICODE_BLOCK(PhoneticExtensionsSupplement, 0x1D80, 0x1DBF) +// DEFINE_IS_IN_UNICODE_BLOCK(CombiningDiacriticalMarksSupplement, 0x1DC0, 0x1DFF) +// DEFINE_IS_IN_UNICODE_BLOCK(LatinExtendedAdditional, 0x1E00, 0x1EFF) +// DEFINE_IS_IN_UNICODE_BLOCK(GreekExtended, 0x1F00, 0x1FFF) +// DEFINE_IS_IN_UNICODE_BLOCK(GeneralPunctuation, 0x2000, 0x206F) +// DEFINE_IS_IN_UNICODE_BLOCK(SuperscriptsandSubscripts, 0x2070, 0x209F) +// DEFINE_IS_IN_UNICODE_BLOCK(CurrencySymbols, 0x20A0, 0x20CF) +// DEFINE_IS_IN_UNICODE_BLOCK(CombiningDiacriticalMarksforSymbols, 0x20D0, 0x20FF) +// DEFINE_IS_IN_UNICODE_BLOCK(LetterlikeSymbols, 0x2100, 0x214F) +// DEFINE_IS_IN_UNICODE_BLOCK(NumberForms, 0x2150, 0x218F) +// DEFINE_IS_IN_UNICODE_BLOCK(Arrows, 0x2190, 0x21FF) +// DEFINE_IS_IN_UNICODE_BLOCK(MathematicalOperators, 0x2200, 0x22FF) +// DEFINE_IS_IN_UNICODE_BLOCK(MiscellaneousTechnical, 0x2300, 0x23FF) +// DEFINE_IS_IN_UNICODE_BLOCK(ControlPictures, 0x2400, 0x243F) +// DEFINE_IS_IN_UNICODE_BLOCK(OpticalCharacterRecognition, 0x2440, 0x245F) +// DEFINE_IS_IN_UNICODE_BLOCK(EnclosedAlphanumerics, 0x2460, 0x24FF) +// DEFINE_IS_IN_UNICODE_BLOCK(BoxDrawing, 0x2500, 0x257F) +// DEFINE_IS_IN_UNICODE_BLOCK(BlockElements, 0x2580, 0x259F) +// DEFINE_IS_IN_UNICODE_BLOCK(GeometricShapes, 0x25A0, 0x25FF) +// DEFINE_IS_IN_UNICODE_BLOCK(MiscellaneousSymbols, 0x2600, 0x26FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Dingbats, 0x2700, 0x27BF) +// DEFINE_IS_IN_UNICODE_BLOCK(MiscellaneousMathematicalSymbolsA, 0x27C0, 0x27EF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementalArrowsA, 0x27F0, 0x27FF) +// DEFINE_IS_IN_UNICODE_BLOCK(BraillePatterns, 0x2800, 0x28FF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementalArrowsB, 0x2900, 0x297F) +// DEFINE_IS_IN_UNICODE_BLOCK(MiscellaneousMathematicalSymbolsB, 0x2980, 0x29FF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementalMathematicalOperators, 0x2A00, 0x2AFF) +// DEFINE_IS_IN_UNICODE_BLOCK(MiscellaneousSymbolsandArrows, 0x2B00, 0x2BFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Glagolitic, 0x2C00, 0x2C5F) +// DEFINE_IS_IN_UNICODE_BLOCK(LatinExtendedC, 0x2C60, 0x2C7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Coptic, 0x2C80, 0x2CFF) +// DEFINE_IS_IN_UNICODE_BLOCK(GeorgianSupplement, 0x2D00, 0x2D2F) +// DEFINE_IS_IN_UNICODE_BLOCK(Tifinagh, 0x2D30, 0x2D7F) +// DEFINE_IS_IN_UNICODE_BLOCK(EthiopicExtended, 0x2D80, 0x2DDF) +// DEFINE_IS_IN_UNICODE_BLOCK(CyrillicExtendedA, 0x2DE0, 0x2DFF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementalPunctuation, 0x2E00, 0x2E7F) +DEFINE_IS_IN_UNICODE_BLOCK(CJKRadicalsSupplement, 0x2E80, 0x2EFF) +DEFINE_IS_IN_UNICODE_BLOCK(KangxiRadicals, 0x2F00, 0x2FDF) +DEFINE_IS_IN_UNICODE_BLOCK(IdeographicDescriptionCharacters, 0x2FF0, 0x2FFF) +DEFINE_IS_IN_UNICODE_BLOCK(CJKSymbolsandPunctuation, 0x3000, 0x303F) +DEFINE_IS_IN_UNICODE_BLOCK(Hiragana, 0x3040, 0x309F) +DEFINE_IS_IN_UNICODE_BLOCK(Katakana, 0x30A0, 0x30FF) +DEFINE_IS_IN_UNICODE_BLOCK(Bopomofo, 0x3100, 0x312F) +// DEFINE_IS_IN_UNICODE_BLOCK(HangulCompatibilityJamo, 0x3130, 0x318F) +// DEFINE_IS_IN_UNICODE_BLOCK(Kanbun, 0x3190, 0x319F) +DEFINE_IS_IN_UNICODE_BLOCK(BopomofoExtended, 0x31A0, 0x31BF) +DEFINE_IS_IN_UNICODE_BLOCK(CJKStrokes, 0x31C0, 0x31EF) +DEFINE_IS_IN_UNICODE_BLOCK(KatakanaPhoneticExtensions, 0x31F0, 0x31FF) +DEFINE_IS_IN_UNICODE_BLOCK(EnclosedCJKLettersandMonths, 0x3200, 0x32FF) +DEFINE_IS_IN_UNICODE_BLOCK(CJKCompatibility, 0x3300, 0x33FF) +DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionA, 0x3400, 0x4DBF) +// DEFINE_IS_IN_UNICODE_BLOCK(YijingHexagramSymbols, 0x4DC0, 0x4DFF) +DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographs, 0x4E00, 0x9FFF) +DEFINE_IS_IN_UNICODE_BLOCK(YiSyllables, 0xA000, 0xA48F) +DEFINE_IS_IN_UNICODE_BLOCK(YiRadicals, 0xA490, 0xA4CF) +// DEFINE_IS_IN_UNICODE_BLOCK(Lisu, 0xA4D0, 0xA4FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Vai, 0xA500, 0xA63F) +// DEFINE_IS_IN_UNICODE_BLOCK(CyrillicExtendedB, 0xA640, 0xA69F) +// DEFINE_IS_IN_UNICODE_BLOCK(Bamum, 0xA6A0, 0xA6FF) +// DEFINE_IS_IN_UNICODE_BLOCK(ModifierToneLetters, 0xA700, 0xA71F) +// DEFINE_IS_IN_UNICODE_BLOCK(LatinExtendedD, 0xA720, 0xA7FF) +// DEFINE_IS_IN_UNICODE_BLOCK(SylotiNagri, 0xA800, 0xA82F) +// DEFINE_IS_IN_UNICODE_BLOCK(CommonIndicNumberForms, 0xA830, 0xA83F) +// DEFINE_IS_IN_UNICODE_BLOCK(Phagspa, 0xA840, 0xA87F) +// DEFINE_IS_IN_UNICODE_BLOCK(Saurashtra, 0xA880, 0xA8DF) +// DEFINE_IS_IN_UNICODE_BLOCK(DevanagariExtended, 0xA8E0, 0xA8FF) +// DEFINE_IS_IN_UNICODE_BLOCK(KayahLi, 0xA900, 0xA92F) +// DEFINE_IS_IN_UNICODE_BLOCK(Rejang, 0xA930, 0xA95F) +// DEFINE_IS_IN_UNICODE_BLOCK(HangulJamoExtendedA, 0xA960, 0xA97F) +// DEFINE_IS_IN_UNICODE_BLOCK(Javanese, 0xA980, 0xA9DF) +// DEFINE_IS_IN_UNICODE_BLOCK(MyanmarExtendedB, 0xA9E0, 0xA9FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Cham, 0xAA00, 0xAA5F) +// DEFINE_IS_IN_UNICODE_BLOCK(MyanmarExtendedA, 0xAA60, 0xAA7F) +// DEFINE_IS_IN_UNICODE_BLOCK(TaiViet, 0xAA80, 0xAADF) +// DEFINE_IS_IN_UNICODE_BLOCK(MeeteiMayekExtensions, 0xAAE0, 0xAAFF) +// DEFINE_IS_IN_UNICODE_BLOCK(EthiopicExtendedA, 0xAB00, 0xAB2F) +// DEFINE_IS_IN_UNICODE_BLOCK(LatinExtendedE, 0xAB30, 0xAB6F) +// DEFINE_IS_IN_UNICODE_BLOCK(CherokeeSupplement, 0xAB70, 0xABBF) +// DEFINE_IS_IN_UNICODE_BLOCK(MeeteiMayek, 0xABC0, 0xABFF) +// DEFINE_IS_IN_UNICODE_BLOCK(HangulSyllables, 0xAC00, 0xD7AF) +// DEFINE_IS_IN_UNICODE_BLOCK(HangulJamoExtendedB, 0xD7B0, 0xD7FF) +// DEFINE_IS_IN_UNICODE_BLOCK(HighSurrogates, 0xD800, 0xDB7F) +// DEFINE_IS_IN_UNICODE_BLOCK(HighPrivateUseSurrogates, 0xDB80, 0xDBFF) +// DEFINE_IS_IN_UNICODE_BLOCK(LowSurrogates, 0xDC00, 0xDFFF) +// DEFINE_IS_IN_UNICODE_BLOCK(PrivateUseArea, 0xE000, 0xF8FF) +DEFINE_IS_IN_UNICODE_BLOCK(CJKCompatibilityIdeographs, 0xF900, 0xFAFF) +// DEFINE_IS_IN_UNICODE_BLOCK(AlphabeticPresentationForms, 0xFB00, 0xFB4F) +// DEFINE_IS_IN_UNICODE_BLOCK(ArabicPresentationFormsA, 0xFB50, 0xFDFF) +// DEFINE_IS_IN_UNICODE_BLOCK(VariationSelectors, 0xFE00, 0xFE0F) +DEFINE_IS_IN_UNICODE_BLOCK(VerticalForms, 0xFE10, 0xFE1F) +// DEFINE_IS_IN_UNICODE_BLOCK(CombiningHalfMarks, 0xFE20, 0xFE2F) +DEFINE_IS_IN_UNICODE_BLOCK(CJKCompatibilityForms, 0xFE30, 0xFE4F) +// DEFINE_IS_IN_UNICODE_BLOCK(SmallFormVariants, 0xFE50, 0xFE6F) +// DEFINE_IS_IN_UNICODE_BLOCK(ArabicPresentationFormsB, 0xFE70, 0xFEFF) +DEFINE_IS_IN_UNICODE_BLOCK(HalfwidthandFullwidthForms, 0xFF00, 0xFFEF) +// DEFINE_IS_IN_UNICODE_BLOCK(Specials, 0xFFF0, 0xFFFF) +// DEFINE_IS_IN_UNICODE_BLOCK(LinearBSyllabary, 0x10000, 0x1007F) +// DEFINE_IS_IN_UNICODE_BLOCK(LinearBIdeograms, 0x10080, 0x100FF) +// DEFINE_IS_IN_UNICODE_BLOCK(AegeanNumbers, 0x10100, 0x1013F) +// DEFINE_IS_IN_UNICODE_BLOCK(AncientGreekNumbers, 0x10140, 0x1018F) +// DEFINE_IS_IN_UNICODE_BLOCK(AncientSymbols, 0x10190, 0x101CF) +// DEFINE_IS_IN_UNICODE_BLOCK(PhaistosDisc, 0x101D0, 0x101FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Lycian, 0x10280, 0x1029F) +// DEFINE_IS_IN_UNICODE_BLOCK(Carian, 0x102A0, 0x102DF) +// DEFINE_IS_IN_UNICODE_BLOCK(CopticEpactNumbers, 0x102E0, 0x102FF) +// DEFINE_IS_IN_UNICODE_BLOCK(OldItalic, 0x10300, 0x1032F) +// DEFINE_IS_IN_UNICODE_BLOCK(Gothic, 0x10330, 0x1034F) +// DEFINE_IS_IN_UNICODE_BLOCK(OldPermic, 0x10350, 0x1037F) +// DEFINE_IS_IN_UNICODE_BLOCK(Ugaritic, 0x10380, 0x1039F) +// DEFINE_IS_IN_UNICODE_BLOCK(OldPersian, 0x103A0, 0x103DF) +// DEFINE_IS_IN_UNICODE_BLOCK(Deseret, 0x10400, 0x1044F) +// DEFINE_IS_IN_UNICODE_BLOCK(Shavian, 0x10450, 0x1047F) +// DEFINE_IS_IN_UNICODE_BLOCK(Osmanya, 0x10480, 0x104AF) +// DEFINE_IS_IN_UNICODE_BLOCK(Osage, 0x104B0, 0x104FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Elbasan, 0x10500, 0x1052F) +// DEFINE_IS_IN_UNICODE_BLOCK(CaucasianAlbanian, 0x10530, 0x1056F) +// DEFINE_IS_IN_UNICODE_BLOCK(LinearA, 0x10600, 0x1077F) +// DEFINE_IS_IN_UNICODE_BLOCK(CypriotSyllabary, 0x10800, 0x1083F) +// DEFINE_IS_IN_UNICODE_BLOCK(ImperialAramaic, 0x10840, 0x1085F) +// DEFINE_IS_IN_UNICODE_BLOCK(Palmyrene, 0x10860, 0x1087F) +// DEFINE_IS_IN_UNICODE_BLOCK(Nabataean, 0x10880, 0x108AF) +// DEFINE_IS_IN_UNICODE_BLOCK(Hatran, 0x108E0, 0x108FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Phoenician, 0x10900, 0x1091F) +// DEFINE_IS_IN_UNICODE_BLOCK(Lydian, 0x10920, 0x1093F) +// DEFINE_IS_IN_UNICODE_BLOCK(MeroiticHieroglyphs, 0x10980, 0x1099F) +// DEFINE_IS_IN_UNICODE_BLOCK(MeroiticCursive, 0x109A0, 0x109FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Kharoshthi, 0x10A00, 0x10A5F) +// DEFINE_IS_IN_UNICODE_BLOCK(OldSouthArabian, 0x10A60, 0x10A7F) +// DEFINE_IS_IN_UNICODE_BLOCK(OldNorthArabian, 0x10A80, 0x10A9F) +// DEFINE_IS_IN_UNICODE_BLOCK(Manichaean, 0x10AC0, 0x10AFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Avestan, 0x10B00, 0x10B3F) +// DEFINE_IS_IN_UNICODE_BLOCK(InscriptionalParthian, 0x10B40, 0x10B5F) +// DEFINE_IS_IN_UNICODE_BLOCK(InscriptionalPahlavi, 0x10B60, 0x10B7F) +// DEFINE_IS_IN_UNICODE_BLOCK(PsalterPahlavi, 0x10B80, 0x10BAF) +// DEFINE_IS_IN_UNICODE_BLOCK(OldTurkic, 0x10C00, 0x10C4F) +// DEFINE_IS_IN_UNICODE_BLOCK(OldHungarian, 0x10C80, 0x10CFF) +// DEFINE_IS_IN_UNICODE_BLOCK(RumiNumeralSymbols, 0x10E60, 0x10E7F) +// DEFINE_IS_IN_UNICODE_BLOCK(Brahmi, 0x11000, 0x1107F) +// DEFINE_IS_IN_UNICODE_BLOCK(Kaithi, 0x11080, 0x110CF) +// DEFINE_IS_IN_UNICODE_BLOCK(SoraSompeng, 0x110D0, 0x110FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Chakma, 0x11100, 0x1114F) +// DEFINE_IS_IN_UNICODE_BLOCK(Mahajani, 0x11150, 0x1117F) +// DEFINE_IS_IN_UNICODE_BLOCK(Sharada, 0x11180, 0x111DF) +// DEFINE_IS_IN_UNICODE_BLOCK(SinhalaArchaicNumbers, 0x111E0, 0x111FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Khojki, 0x11200, 0x1124F) +// DEFINE_IS_IN_UNICODE_BLOCK(Multani, 0x11280, 0x112AF) +// DEFINE_IS_IN_UNICODE_BLOCK(Khudawadi, 0x112B0, 0x112FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Grantha, 0x11300, 0x1137F) +// DEFINE_IS_IN_UNICODE_BLOCK(Newa, 0x11400, 0x1147F) +// DEFINE_IS_IN_UNICODE_BLOCK(Tirhuta, 0x11480, 0x114DF) +// DEFINE_IS_IN_UNICODE_BLOCK(Siddham, 0x11580, 0x115FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Modi, 0x11600, 0x1165F) +// DEFINE_IS_IN_UNICODE_BLOCK(MongolianSupplement, 0x11660, 0x1167F) +// DEFINE_IS_IN_UNICODE_BLOCK(Takri, 0x11680, 0x116CF) +// DEFINE_IS_IN_UNICODE_BLOCK(Ahom, 0x11700, 0x1173F) +// DEFINE_IS_IN_UNICODE_BLOCK(WarangCiti, 0x118A0, 0x118FF) +// DEFINE_IS_IN_UNICODE_BLOCK(PauCinHau, 0x11AC0, 0x11AFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Bhaiksuki, 0x11C00, 0x11C6F) +// DEFINE_IS_IN_UNICODE_BLOCK(Marchen, 0x11C70, 0x11CBF) +// DEFINE_IS_IN_UNICODE_BLOCK(Cuneiform, 0x12000, 0x123FF) +// DEFINE_IS_IN_UNICODE_BLOCK(CuneiformNumbersandPunctuation, 0x12400, 0x1247F) +// DEFINE_IS_IN_UNICODE_BLOCK(EarlyDynasticCuneiform, 0x12480, 0x1254F) +// DEFINE_IS_IN_UNICODE_BLOCK(EgyptianHieroglyphs, 0x13000, 0x1342F) +// DEFINE_IS_IN_UNICODE_BLOCK(AnatolianHieroglyphs, 0x14400, 0x1467F) +// DEFINE_IS_IN_UNICODE_BLOCK(BamumSupplement, 0x16800, 0x16A3F) +// DEFINE_IS_IN_UNICODE_BLOCK(Mro, 0x16A40, 0x16A6F) +// DEFINE_IS_IN_UNICODE_BLOCK(BassaVah, 0x16AD0, 0x16AFF) +// DEFINE_IS_IN_UNICODE_BLOCK(PahawhHmong, 0x16B00, 0x16B8F) +// DEFINE_IS_IN_UNICODE_BLOCK(Miao, 0x16F00, 0x16F9F) +// DEFINE_IS_IN_UNICODE_BLOCK(IdeographicSymbolsandPunctuation, 0x16FE0, 0x16FFF) +// DEFINE_IS_IN_UNICODE_BLOCK(Tangut, 0x17000, 0x187FF) +// DEFINE_IS_IN_UNICODE_BLOCK(TangutComponents, 0x18800, 0x18AFF) +// DEFINE_IS_IN_UNICODE_BLOCK(KanaSupplement, 0x1B000, 0x1B0FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Duployan, 0x1BC00, 0x1BC9F) +// DEFINE_IS_IN_UNICODE_BLOCK(ShorthandFormatControls, 0x1BCA0, 0x1BCAF) +// DEFINE_IS_IN_UNICODE_BLOCK(ByzantineMusicalSymbols, 0x1D000, 0x1D0FF) +// DEFINE_IS_IN_UNICODE_BLOCK(MusicalSymbols, 0x1D100, 0x1D1FF) +// DEFINE_IS_IN_UNICODE_BLOCK(AncientGreekMusicalNotation, 0x1D200, 0x1D24F) +// DEFINE_IS_IN_UNICODE_BLOCK(TaiXuanJingSymbols, 0x1D300, 0x1D35F) +// DEFINE_IS_IN_UNICODE_BLOCK(CountingRodNumerals, 0x1D360, 0x1D37F) +// DEFINE_IS_IN_UNICODE_BLOCK(MathematicalAlphanumericSymbols, 0x1D400, 0x1D7FF) +// DEFINE_IS_IN_UNICODE_BLOCK(SuttonSignWriting, 0x1D800, 0x1DAAF) +// DEFINE_IS_IN_UNICODE_BLOCK(GlagoliticSupplement, 0x1E000, 0x1E02F) +// DEFINE_IS_IN_UNICODE_BLOCK(MendeKikakui, 0x1E800, 0x1E8DF) +// DEFINE_IS_IN_UNICODE_BLOCK(Adlam, 0x1E900, 0x1E95F) +// DEFINE_IS_IN_UNICODE_BLOCK(ArabicMathematicalAlphabeticSymbols, 0x1EE00, 0x1EEFF) +// DEFINE_IS_IN_UNICODE_BLOCK(MahjongTiles, 0x1F000, 0x1F02F) +// DEFINE_IS_IN_UNICODE_BLOCK(DominoTiles, 0x1F030, 0x1F09F) +// DEFINE_IS_IN_UNICODE_BLOCK(PlayingCards, 0x1F0A0, 0x1F0FF) +// DEFINE_IS_IN_UNICODE_BLOCK(EnclosedAlphanumericSupplement, 0x1F100, 0x1F1FF) +// DEFINE_IS_IN_UNICODE_BLOCK(EnclosedIdeographicSupplement, 0x1F200, 0x1F2FF) +// DEFINE_IS_IN_UNICODE_BLOCK(MiscellaneousSymbolsandPictographs, 0x1F300, 0x1F5FF) +// DEFINE_IS_IN_UNICODE_BLOCK(Emoticons, 0x1F600, 0x1F64F) +// DEFINE_IS_IN_UNICODE_BLOCK(OrnamentalDingbats, 0x1F650, 0x1F67F) +// DEFINE_IS_IN_UNICODE_BLOCK(TransportandMapSymbols, 0x1F680, 0x1F6FF) +// DEFINE_IS_IN_UNICODE_BLOCK(AlchemicalSymbols, 0x1F700, 0x1F77F) +// DEFINE_IS_IN_UNICODE_BLOCK(GeometricShapesExtended, 0x1F780, 0x1F7FF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementalArrowsC, 0x1F800, 0x1F8FF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementalSymbolsandPictographs, 0x1F900, 0x1F9FF) +// DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionB, 0x20000, 0x2A6DF) +// DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionC, 0x2A700, 0x2B73F) +// DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionD, 0x2B740, 0x2B81F) +// DEFINE_IS_IN_UNICODE_BLOCK(CJKUnifiedIdeographsExtensionE, 0x2B820, 0x2CEAF) +// DEFINE_IS_IN_UNICODE_BLOCK(CJKCompatibilityIdeographsSupplement, 0x2F800, 0x2FA1F) +// DEFINE_IS_IN_UNICODE_BLOCK(Tags, 0xE0000, 0xE007F) +// DEFINE_IS_IN_UNICODE_BLOCK(VariationSelectorsSupplement, 0xE0100, 0xE01EF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementaryPrivateUseAreaA, 0xF0000, 0xFFFFF) +// DEFINE_IS_IN_UNICODE_BLOCK(SupplementaryPrivateUseAreaB, 0x100000, 0x10FFFF) +} + +namespace mbgl { +namespace util { +namespace i18n { + +bool isVisible(uint32_t chr) { + return (chr == 0x0a /* newline */ + || chr == 0x20 /* space */ + || chr == 0x200b /* zero-width space */); +} + +bool allowsWordBreaking(uint32_t chr) { + return (chr == 0x0a /* newline */ + || chr == 0x20 /* space */ + || chr == 0x26 /* ampersand */ + || chr == 0x2b /* plus sign */ + || chr == 0x2d /* hyphen-minus */ + || chr == 0x2f /* solidus */ + || chr == 0xad /* soft hyphen */ + || chr == 0xb7 /* middle dot */ + || chr == 0x200b /* zero-width space */ + || chr == 0x2010 /* hyphen */ + || chr == 0x2013 /* en dash */); +} + +bool allowsIdeographicBreaking(const std::u32string& string) { + for (uint32_t chr : string) { + if (!allowsIdeographicBreaking(chr)) { + return false; + } + } + return true; +} + +bool allowsIdeographicBreaking(uint32_t chr) { + // Return early for characters outside all ideographic ranges. + if (chr < 0x2E80) + return false; + + return (isInBopomofo(chr) || isInBopomofoExtended(chr) || isInCJKCompatibility(chr) || + isInCJKCompatibilityForms(chr) || isInCJKCompatibilityIdeographs(chr) || + isInCJKRadicalsSupplement(chr) || isInCJKStrokes(chr) || + isInCJKSymbolsandPunctuation(chr) || isInCJKUnifiedIdeographs(chr) || + isInCJKUnifiedIdeographsExtensionA(chr) || isInEnclosedCJKLettersandMonths(chr) || + isInHalfwidthandFullwidthForms(chr) || isInHiragana(chr) || + isInIdeographicDescriptionCharacters(chr) || isInKangxiRadicals(chr) || + isInKatakana(chr) || isInKatakanaPhoneticExtensions(chr) || isInVerticalForms(chr) || + isInYiRadicals(chr) || isInYiSyllables(chr)); + + // The following blocks also allow ideographic breaking; however, for other + // reasons, Mapbox GL lacks support for codepoints beyond U+FFFF. + // https://github.com/mapbox/mapbox-gl/issues/29 + // return (isInTangut(chr) + // || isInTangutComponents(chr) + // || isInIdeographicSymbolsandPunctuation(chr) + // || isInEnclosedIdeographicSupplement(chr) + // || isInCJKUnifiedIdeographsExtensionB(chr) + // || isInCJKUnifiedIdeographsExtensionC(chr) + // || isInCJKUnifiedIdeographsExtensionD(chr) + // || isInCJKUnifiedIdeographsExtensionE(chr) + // || isInCJKCompatibilityIdeographsSupplement(chr)); +} + +} // namespace i18n +} // namespace util +} // namespace mbgl diff --git a/src/mbgl/util/i18n.hpp b/src/mbgl/util/i18n.hpp new file mode 100644 index 00000000000..fe324f53620 --- /dev/null +++ b/src/mbgl/util/i18n.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace mbgl { +namespace util { +namespace i18n { + +/** Returns whether a character is a visible character. */ +bool isVisible(uint32_t chr); + +/** Returns whether a line break can be inserted after the character indicated + by the given Unicode codepoint due to word breaking. */ +bool allowsWordBreaking(uint32_t chr); + +/** Returns whether a line break can be inserted after any character in the + given string. If false, line breaking should occur on word boundaries + instead. */ +bool allowsIdeographicBreaking(const std::u32string& string); + +/** Returns whether a line break can be inserted after the character indicated + by the given Unicode codepoint due to ideographic breaking. */ +bool allowsIdeographicBreaking(uint32_t chr); + +} // namespace i18n +} // namespace util +} // namespace mbgl