diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 7729cdcdbd4..4722dc0dfc4 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -92,47 +92,35 @@ CharToColumnMapper::CharToColumnMapper(const wchar_t* chars, const uint16_t* cha // If given a position (`offset`) inside the ROW's text, this function will return the corresponding column. // This function in particular returns the glyph's first column. -til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t offset) noexcept +til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t targetOffset) noexcept { - offset = clamp(offset, 0, _lastCharOffset); + targetOffset = clamp(targetOffset, 0, _lastCharOffset); + + // This code needs to fulfill two conditions on top of the obvious (a forward/backward search): + // A: We never want to stop on a column that is marked with CharOffsetsTrailer (= "GetLeadingColumn"). + // B: With these parameters we always want to stop at currentOffset=4: + // _charOffsets={4, 6} + // currentOffset=4 *OR* 6 + // targetOffset=5 + // This is because we're being asked for a "LeadingColumn", while the caller gave us the offset of a + // trailing surrogate pair or similar. Returning the column of the leading half is the correct choice. auto col = _currentColumn; - const auto currentOffset = _charOffsets[col] & CharOffsetsMask; + auto currentOffset = _charOffsets[col]; - // Goal: Move the _currentColumn cursor to a cell which contains the given target offset. - // Depending on where the target offset is we have to either search forward or backward. - if (offset < currentOffset) + // A plain forward-search until we find our targetOffset. + // This loop may iterate too far and thus violate our example in condition B, however... + while (targetOffset > (currentOffset & CharOffsetsMask)) { - // Backward search. - // Goal: Find the first preceding column where the offset is <= the target offset. This results in the first - // cell that contains our target offset, even if that offset is in the middle of a long grapheme. - // - // We abuse the fact that the trailing half of wide glyphs is marked with CharOffsetsTrailer to our advantage. - // Since they're >0x8000, the `offset < _charOffsets[col]` check will always be true and ensure we iterate over them. - // - // Since _charOffsets cannot contain negative values and because offset has been - // clamped to be positive we naturally exit when reaching the first column. - for (; offset < _charOffsets[col - 1]; --col) - { - } + currentOffset = _charOffsets[++col]; } - else if (offset > currentOffset) + // This backward-search is not just a counter-part to the above, but simultaneously also handles conditions A and B. + // It abuses the fact that columns marked with CharOffsetsTrailer are >0x8000 and targetOffset is always <0x8000. + // This means we skip all "trailer" columns when iterating backwards, and only stop on a non-trailer (= condition A). + // Condition B is fixed simply because we iterate backwards after the forward-search (in that exact order). + while (targetOffset < currentOffset) { - // Forward search. - // Goal: Find the first subsequent column where the offset is > the target offset. - // We stop 1 column before that however so that the next loop works correctly. - // It's the inverse of the loop above. - // - // Since offset has been clamped to be at most 1 less than the maximum - // _charOffsets value the loop naturally exits before hitting the end. - for (; offset >= (_charOffsets[col + 1] & CharOffsetsMask); ++col) - { - } - // Now that we found the cell that definitely includes this char offset, - // we have to iterate back to the cell's starting column. - for (; WI_IsFlagSet(_charOffsets[col], CharOffsetsTrailer); --col) - { - } + currentOffset = _charOffsets[--col]; } _currentColumn = col; diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp index af8088c3ccd..197343df6d8 100644 --- a/src/buffer/out/Row.hpp +++ b/src/buffer/out/Row.hpp @@ -71,7 +71,7 @@ struct CharToColumnMapper { CharToColumnMapper(const wchar_t* chars, const uint16_t* charOffsets, ptrdiff_t lastCharOffset, til::CoordType currentColumn) noexcept; - til::CoordType GetLeadingColumnAt(ptrdiff_t offset) noexcept; + til::CoordType GetLeadingColumnAt(ptrdiff_t targetOffset) noexcept; til::CoordType GetTrailingColumnAt(ptrdiff_t offset) noexcept; til::CoordType GetLeadingColumnAt(const wchar_t* str) noexcept; til::CoordType GetTrailingColumnAt(const wchar_t* str) noexcept; diff --git a/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj b/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj index ad3b48c7cb5..c52d012baec 100644 --- a/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj +++ b/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj @@ -14,6 +14,7 @@ + Create @@ -41,4 +42,4 @@ - \ No newline at end of file + diff --git a/src/buffer/out/ut_textbuffer/UTextAdapterTests.cpp b/src/buffer/out/ut_textbuffer/UTextAdapterTests.cpp new file mode 100644 index 00000000000..ee879f4f677 --- /dev/null +++ b/src/buffer/out/ut_textbuffer/UTextAdapterTests.cpp @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "precomp.h" + +#include "WexTestClass.h" +#include "../textBuffer.hpp" +#include "../../renderer/inc/DummyRenderer.hpp" + +template<> +class WEX::TestExecution::VerifyOutputTraits> +{ +public: + static WEX::Common::NoThrowString ToString(const std::vector& vec) + { + WEX::Common::NoThrowString str; + str.Append(L"{ "); + for (size_t i = 0; i < vec.size(); ++i) + { + const auto& s = vec[i]; + if (i != 0) + { + str.Append(L", "); + } + str.AppendFormat(L"{(%d, %d), (%d, %d)}", s.start.x, s.start.y, s.end.x, s.end.y); + } + str.Append(L" }"); + return str; + } +}; + +class UTextAdapterTests +{ + TEST_CLASS(UTextAdapterTests); + + TEST_METHOD(Unicode) + { + DummyRenderer renderer; + TextBuffer buffer{ til::size{ 24, 1 }, TextAttribute{}, 0, false, renderer }; + + RowWriteState state{ + .text = L"abc 𝒢𝒷𝒸 abc ネコけゃん", + }; + buffer.Write(0, TextAttribute{}, state); + VERIFY_IS_TRUE(state.text.empty()); + + static constexpr auto s = [](til::CoordType beg, til::CoordType end) -> til::point_span { + return { { beg, 0 }, { end, 0 } }; + }; + + auto expected = std::vector{ s(0, 2), s(8, 10) }; + auto actual = buffer.SearchText(L"abc", false); + VERIFY_ARE_EQUAL(expected, actual); + + expected = std::vector{ s(5, 5) }; + actual = buffer.SearchText(L"𝒷", false); + VERIFY_ARE_EQUAL(expected, actual); + + expected = std::vector{ s(12, 15) }; + actual = buffer.SearchText(L"ネコ", false); + VERIFY_ARE_EQUAL(expected, actual); + } +}; diff --git a/src/buffer/out/ut_textbuffer/sources b/src/buffer/out/ut_textbuffer/sources index 570467fce63..842246aaee6 100644 --- a/src/buffer/out/ut_textbuffer/sources +++ b/src/buffer/out/ut_textbuffer/sources @@ -17,6 +17,7 @@ SOURCES = \ ReflowTests.cpp \ TextColorTests.cpp \ TextAttributeTests.cpp \ + UTextAdapterTests.cpp \ DefaultResource.rc \ TARGETLIBS = \ diff --git a/src/inc/til/point.h b/src/inc/til/point.h index 332b6fd8e0b..4da8d5c6ddb 100644 --- a/src/inc/til/point.h +++ b/src/inc/til/point.h @@ -276,6 +276,18 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" { til::point start; til::point end; + + constexpr bool operator==(const point_span& rhs) const noexcept + { + // `__builtin_memcmp` isn't an official standard, but it's the + // only way at the time of writing to get a constexpr `memcmp`. + return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; + } + + constexpr bool operator!=(const point_span& rhs) const noexcept + { + return __builtin_memcmp(this, &rhs, sizeof(rhs)) != 0; + } }; }