From 462b8d228d1f8add0467b7c80c1a88802abdb669 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 14:21:02 +0200 Subject: [PATCH 01/22] Use ICU for text search --- .github/actions/spelling/allow/apis.txt | 1 + .github/actions/spelling/expect/expect.txt | 52 +-- src/buffer/out/Row.cpp | 207 +++++++++--- src/buffer/out/Row.hpp | 44 ++- src/buffer/out/UTextAdapter.cpp | 262 +++++++++++++++ src/buffer/out/UTextAdapter.h | 13 + src/buffer/out/lib/bufferout.vcxproj | 2 + src/buffer/out/search.cpp | 332 +++--------------- src/buffer/out/search.h | 62 +--- src/buffer/out/textBuffer.cpp | 336 ++++++++----------- src/buffer/out/textBuffer.hpp | 26 +- src/cascadia/TerminalControl/ControlCore.cpp | 37 +- src/cascadia/TerminalControl/ControlCore.h | 10 +- src/cascadia/TerminalControl/ControlCore.idl | 1 + src/cascadia/TerminalControl/TermControl.cpp | 1 + src/cascadia/TerminalCore/Terminal.cpp | 12 +- src/cascadia/TerminalCore/Terminal.hpp | 1 - src/cascadia/TerminalCore/TerminalApi.cpp | 2 - src/host/_stream.cpp | 4 +- src/host/selectionInput.cpp | 7 +- src/host/telemetry.cpp | 38 --- src/host/telemetry.hpp | 6 - src/inc/til/at.h | 1 + src/interactivity/win32/find.cpp | 68 ++-- src/terminal/adapter/adaptDispatch.cpp | 8 +- src/types/UiaTextRangeBase.cpp | 70 ++-- src/types/UiaTracing.cpp | 20 +- src/types/UiaTracing.h | 16 +- 28 files changed, 823 insertions(+), 816 deletions(-) create mode 100644 src/buffer/out/UTextAdapter.cpp create mode 100644 src/buffer/out/UTextAdapter.h diff --git a/.github/actions/spelling/allow/apis.txt b/.github/actions/spelling/allow/apis.txt index 08b1c6bcadb..1f85a1afab2 100644 --- a/.github/actions/spelling/allow/apis.txt +++ b/.github/actions/spelling/allow/apis.txt @@ -52,6 +52,7 @@ futex GETDESKWALLPAPER GETHIGHCONTRAST GETMOUSEHOVERTIME +GETTEXTLENGTH Hashtable HIGHCONTRASTON HIGHCONTRASTW diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 626777f6bdb..3a69241e2fd 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -7,7 +7,6 @@ ABCF abgr abi ABORTIFHUNG -ACCESSTOKEN acidev ACIOSS ACover @@ -117,10 +116,8 @@ binplace binplaced bitcoin bitcrazed -bitflag bitmask BITOPERATION -bitsets BKCOLOR BKGND Bksp @@ -149,12 +146,12 @@ bufferout buffersize buflen buildtransitive -BUILDURI burriter BValue bytebuffer cac cacafire +CALLCONV capslock CARETBLINKINGENABLED CARRIAGERETURN @@ -198,7 +195,6 @@ CHT Cic cielab Cielab -Clcompile CLE cleartype CLICKACTIVE @@ -229,7 +225,6 @@ codepage codepath codepoints coinit -COLLECTIONURI colorizing COLORMATRIX COLORREFs @@ -307,7 +302,6 @@ coordnew COPYCOLOR CORESYSTEM cotaskmem -countof CPG cpinfo CPINFOEX @@ -315,7 +309,6 @@ CPLINFO cplusplus CPPCORECHECK cppcorecheckrules -cpprest cpprestsdk cppwinrt CProc @@ -382,7 +375,6 @@ dai DATABLOCK DBatch dbcs -DBCSCHAR DBCSFONT dbg DBGALL @@ -504,7 +496,6 @@ devicecode Dext DFactory DFF -dhandler dialogbox directio DIRECTX @@ -522,7 +513,6 @@ dllmain DLLVERSIONINFO DLOAD DLOOK -dmp DONTCARE doskey dotnet @@ -600,7 +590,6 @@ eplace EPres EQU ERASEBKGND -etcoreapp ETW EUDC EVENTID @@ -642,7 +631,6 @@ FGs FILEDESCRIPTION FILESUBTYPE FILESYSPATH -fileurl FILEW FILLATTR FILLCONSOLEOUTPUT @@ -824,7 +812,6 @@ HIWORD HKCU hkey hkl -HKLM hlocal hlsl HMB @@ -832,7 +819,6 @@ HMK hmod hmodule hmon -homeglyphs homoglyph HORZ hostable @@ -941,7 +927,6 @@ IUI IUnknown ivalid IWIC -IXMP IXP jconcpp JOBOBJECT @@ -965,7 +950,6 @@ kernelbasestaging KEYBDINPUT keychord keydown -keyevent KEYFIRST KEYLAST Keymapping @@ -1012,7 +996,6 @@ LINEWRAP LINKERRCAP LINKERROR linputfile -listproperties listptr listptrsize lld @@ -1131,7 +1114,6 @@ MIIM milli mincore mindbogglingly -minimizeall minkernel MINMAXINFO minwin @@ -1318,7 +1300,6 @@ onecoreuuid ONECOREWINDOWS onehalf oneseq -ONLCR openbash opencode opencon @@ -1328,13 +1309,6 @@ openps openvt ORIGINALFILENAME osc -OSCBG -OSCCT -OSCFG -OSCRCC -OSCSCB -OSCSCC -OSCWT OSDEPENDSROOT OSG OSGENG @@ -1453,7 +1427,6 @@ PPEB ppf ppguid ppidl -pplx PPROC ppropvar ppsi @@ -1468,7 +1441,6 @@ prealigned prect prefast prefs -preinstalled prepopulated presorted PREVENTPINNING @@ -1481,7 +1453,6 @@ prioritization processenv processhost PROCESSINFOCLASS -procs PROPERTYID PROPERTYKEY PROPERTYVAL @@ -1496,7 +1467,6 @@ propvariant propvarutil psa PSECURITY -pseudocode pseudoconsole pseudoterminal psh @@ -1776,7 +1746,6 @@ SND SOLIDBOX Solutiondir somefile -SOURCEBRANCH sourced spammy SRCCODEPAGE @@ -1828,7 +1797,6 @@ SUBLANG subresource subsystemconsole subsystemwindows -suiteless swapchain swapchainpanel swappable @@ -1873,7 +1841,6 @@ tcommands Tdd TDelegated TDP -TEAMPROJECT tearoff Teb Techo @@ -1885,23 +1852,18 @@ terminalrenderdata TERMINALSCROLLING terminfo TEs -testbuildplatform testcon testd -testdlls testenv testlab testlist testmd -testmode testname -testnameprefix TESTNULL testpass testpasses testtestabc testtesttesttesttest -testtimeout TEXCOORD texel TExpected @@ -1929,7 +1891,6 @@ TJson TLambda TLDP TLEN -Tlgdata TMAE TMPF TMult @@ -1989,11 +1950,13 @@ UAC uap uapadmin UAX +UBool ucd uch udk UDM uer +UError uget uia UIACCESS @@ -2023,13 +1986,14 @@ unknwn UNORM unparseable unregistering -untests untextured untimes UPDATEDISPLAY UPDOWN UPKEY UPSS +uregex +URegular usebackq USECALLBACK USECOLOR @@ -2051,6 +2015,9 @@ USESIZE USESTDHANDLES usp USRDLL +utext +UText +UTEXT utr UVWX UVWXY @@ -2134,7 +2101,6 @@ WDDMCONSOLECONTEXT wdm webpage websites -websockets wekyb wex wextest @@ -2162,7 +2128,6 @@ windbg WINDEF windll WINDOWALPHA -Windowbuffer windowdpiapi WINDOWEDGE windowext @@ -2306,7 +2271,6 @@ xunit xutr XVIRTUALSCREEN XWalk -xxyyzz yact YCast YCENTER diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 760f8cf501f..c933de6950e 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -10,8 +10,19 @@ #include "textBuffer.hpp" #include "../../types/inc/GlyphWidth.hpp" +// It would be nice to add checked array access in the future, but it's a little annoying to do so without imparting +// performance (including Debug performance). Other languages are a little bit more ergonomic there than C++. +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).) +#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26472) // Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1). + extern "C" int __isa_available; +constexpr auto clamp(auto value, auto lo, auto hi) +{ + return value < lo ? lo : (value > hi ? hi : value); +} + // The STL is missing a std::iota_n analogue for std::iota, so I made my own. template constexpr OutIt iota_n(OutIt dest, Diff count, T val) @@ -71,6 +82,82 @@ constexpr OutIt copy_n_small(InIt first, Diff count, OutIt dest) return dest; } +CharToColumnMapper::CharToColumnMapper(const wchar_t* chars, const uint16_t* charOffsets, ptrdiff_t lastCharOffset, til::CoordType currentColumn) noexcept : + _chars{ chars }, + _charOffsets{ charOffsets }, + _lastCharOffset{ lastCharOffset }, + _currentColumn{ currentColumn } +{ +} + +til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t offset) noexcept +{ + offset = offset < 0 ? 0 : (offset > _lastCharOffset ? _lastCharOffset : offset); + + auto col = _currentColumn; + const auto currentOffset = _charOffsets[col]; + + // Goal: Move the _currentColumn cursor to a cell which contains the given target offset. + // Depending on where the target offset is we have to either search forward or backward. + if (offset < currentOffset) + { + // Backward search. + // Goal: Find the first preceding column where the offset is <= the target offset. This results in the first + // cell that contains our target offset, even if that offset is in the middle of a long grapheme. + // + // We abuse the fact that the trailing half of wide glyphs is marked with CharOffsetsTrailer to our advantage. + // Since they're >0x8000, the `offset < _charOffsets[col]` check will always be true and ensure we iterate over them. + // + // Since _charOffsets cannot contain negative values and because offset has been + // clamped to be positive we naturally exit when reaching the first column. + for (; offset < _charOffsets[col - 1]; --col) + { + } + } + else if (offset > currentOffset) + { + // Forward search. + // Goal: Find the first subsequent column where the offset is > the target offset. + // We stop 1 column before that however so that the next loop works correctly. + // It's the inverse of the loop above. + // + // Since offset has been clamped to be at most 1 less than the maximum + // _charOffsets value the loop naturally exits before hitting the end. + for (; offset >= (_charOffsets[col + 1] & CharOffsetsMask); ++col) + { + } + // Now that we found the cell that definitely includes this char offset, + // we have to iterate back to the cell's starting column. + for (; WI_IsFlagSet(_charOffsets[col], CharOffsetsTrailer); --col) + { + } + } + + _currentColumn = col; + return col; +} + +til::CoordType CharToColumnMapper::GetTrailingColumnAt(ptrdiff_t offset) noexcept +{ + auto col = GetLeadingColumnAt(offset); + // This loop is a little redundant with the forward search loop in GetLeadingColumnAt() + // but it's realistically not worth caring about this. This code is not a bottleneck. + for (; WI_IsFlagSet(_charOffsets[col + 1], CharOffsetsTrailer); ++col) + { + } + return col; +} + +til::CoordType CharToColumnMapper::GetLeadingColumnAt(const wchar_t* str) noexcept +{ + return GetLeadingColumnAt(str - _chars); +} + +til::CoordType CharToColumnMapper::GetTrailingColumnAt(const wchar_t* str) noexcept +{ + return GetTrailingColumnAt(str - _chars); +} + // Routine Description: // - constructor // Arguments: @@ -118,10 +205,16 @@ LineRendition ROW::GetLineRendition() const noexcept return _lineRendition; } -uint16_t ROW::GetLineWidth() const noexcept +// Returns the index 1 past the last (technically) valid column in the row. +// The interplay between the old console and newer VT APIs which support line renditions is +// still unclear so it might be necessary to add two kinds of this function in the future. +// Console APIs treat the buffer as a large NxM matrix after all. +til::CoordType ROW::GetReadableColumnCount() const noexcept { - const auto scale = _lineRendition != LineRendition::SingleWidth ? 1 : 0; - return _columnCount >> scale; + const til::CoordType columnCount = _columnCount; + const til::CoordType scale = _lineRendition != LineRendition::SingleWidth; + const til::CoordType padding = _doubleBytePadded; + return (columnCount - padding) >> scale; } // Routine Description: @@ -287,26 +380,6 @@ til::CoordType ROW::NavigateToNext(til::CoordType column) const noexcept return _adjustForward(_clampedColumn(column + 1)); } -uint16_t ROW::_adjustBackward(uint16_t column) const noexcept -{ - // Safety: This is a little bit more dangerous. The first column is supposed - // to never be a trailer and so this loop should exit if column == 0. - for (; _uncheckedIsTrailer(column); --column) - { - } - return column; -} - -uint16_t ROW::_adjustForward(uint16_t column) const noexcept -{ - // Safety: This is a little bit more dangerous. The last column is supposed - // to never be a trailer and so this loop should exit if column == _columnCount. - for (; _uncheckedIsTrailer(column); ++column) - { - } - return column; -} - // Routine Description: // - clears char data in column in row // Arguments: @@ -841,12 +914,6 @@ uint16_t ROW::size() const noexcept return _columnCount; } -til::CoordType ROW::LineRenditionColumns() const noexcept -{ - const auto scale = _lineRendition != LineRendition::SingleWidth ? 1 : 0; - return _columnCount >> scale; -} - til::CoordType ROW::MeasureLeft() const noexcept { const auto text = GetText(); @@ -945,20 +1012,33 @@ DbcsAttribute ROW::DbcsAttrAt(til::CoordType column) const noexcept std::wstring_view ROW::GetText() const noexcept { - return { _chars.data(), _charSize() }; + const auto width = size_t{ til::at(_charOffsets, GetReadableColumnCount()) } & CharOffsetsMask; + return { _chars.data(), width }; } std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept { const til::CoordType columns = _columnCount; - const auto colBeg = std::max(0, std::min(columns, columnBegin)); - const auto colEnd = std::max(colBeg, std::min(columns, columnEnd)); + const auto colBeg = clamp(columnBegin, 0, columns); + const auto colEnd = clamp(columnEnd, colBeg, columns); const size_t chBeg = _uncheckedCharOffset(gsl::narrow_cast(colBeg)); const size_t chEnd = _uncheckedCharOffset(gsl::narrow_cast(colEnd)); #pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). return { _chars.data() + chBeg, chEnd - chBeg }; } +til::CoordType ROW::GetLeftAlignedColumnAtChar(const ptrdiff_t offset) const noexcept +{ + auto mapper = _createCharToColumnMapper(offset); + return mapper.GetLeadingColumnAt(offset); +} + +til::CoordType ROW::GetRightAlignedColumnAtChar(const ptrdiff_t offset) const noexcept +{ + auto mapper = _createCharToColumnMapper(offset); + return mapper.GetTrailingColumnAt(offset); +} + DelimiterClass ROW::DelimiterClassAt(til::CoordType column, const std::wstring_view& wordDelimiters) const noexcept { const auto col = _clampedColumn(column); @@ -982,43 +1062,80 @@ DelimiterClass ROW::DelimiterClassAt(til::CoordType column, const std::wstring_v template constexpr uint16_t ROW::_clampedUint16(T v) noexcept { - return static_cast(std::max(T{ 0 }, std::min(T{ 65535 }, v))); + return static_cast(clamp(v, 0, 65535)); } template constexpr uint16_t ROW::_clampedColumn(T v) const noexcept { - return static_cast(std::max(T{ 0 }, std::min(_columnCount - 1u, v))); + return static_cast(clamp(v, 0, _columnCount - 1)); } template constexpr uint16_t ROW::_clampedColumnInclusive(T v) const noexcept { - return static_cast(std::max(T{ 0 }, std::min(_columnCount, v))); + return static_cast(clamp(v, 0, _columnCount)); } -// Safety: off must be [0, _charSize()]. -wchar_t ROW::_uncheckedChar(size_t off) const noexcept +uint16_t ROW::_charSize() const noexcept { - return til::at(_chars, off); + // Safety: _charOffsets is an array of `_columnCount + 1` entries. + return _charOffsets[_columnCount]; } -uint16_t ROW::_charSize() const noexcept +// Safety: off must be [0, _charSize()]. +template +wchar_t ROW::_uncheckedChar(T off) const noexcept { - // Safety: _charOffsets is an array of `_columnCount + 1` entries. - return til::at(_charOffsets, _columnCount); + return _chars[off]; } // Safety: col must be [0, _columnCount]. -uint16_t ROW::_uncheckedCharOffset(size_t col) const noexcept +template +uint16_t ROW::_uncheckedCharOffset(T col) const noexcept { assert(col < _charOffsets.size()); - return til::at(_charOffsets, col) & CharOffsetsMask; + return _charOffsets[col] & CharOffsetsMask; } // Safety: col must be [0, _columnCount]. -bool ROW::_uncheckedIsTrailer(size_t col) const noexcept +template +bool ROW::_uncheckedIsTrailer(T col) const noexcept { assert(col < _charOffsets.size()); - return WI_IsFlagSet(til::at(_charOffsets, col), CharOffsetsTrailer); + return WI_IsFlagSet(_charOffsets[col], CharOffsetsTrailer); +} + +template +T ROW::_adjustBackward(T column) const noexcept +{ + // Safety: This is a little bit more dangerous. The first column is supposed + // to never be a trailer and so this loop should exit if column == 0. + for (; _uncheckedIsTrailer(column); --column) + { + } + return column; +} + +template +T ROW::_adjustForward(T column) const noexcept +{ + // Safety: This is a little bit more dangerous. The last column is supposed + // to never be a trailer and so this loop should exit if column == _columnCount. + for (; _uncheckedIsTrailer(column); ++column) + { + } + return column; +} + +// Creates a CharToColumnMapper given an offset into _chars.data(). +// In other words, for a 120 column ROW with just ASCII text, the offset should be [0,120). +CharToColumnMapper ROW::_createCharToColumnMapper(ptrdiff_t offset) const noexcept +{ + const auto charsSize = _charSize(); + const auto lastChar = gsl::narrow_cast(charsSize - 1); + // We can sort of guess what column belongs to what offset because BMP glyphs are very common and + // UTF-16 stores them in 1 char. In other words, usually a ROW will have N chars for N columns. + const auto guessedColumn = gsl::narrow_cast(clamp(offset, 0, _columnCount)); + return CharToColumnMapper{ _chars.data(), _charOffsets.data(), lastChar, guessedColumn }; } diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp index c19f343f235..379b1856bcc 100644 --- a/src/buffer/out/Row.hpp +++ b/src/buffer/out/Row.hpp @@ -65,6 +65,28 @@ struct RowCopyTextFromState til::CoordType sourceColumnEnd = 0; // OUT }; +// This structure is basically an inverse of ROW::_charOffsets. If you have a pointer +// into a ROW's text this class can tell you what cell that pointer belongs to. +struct CharToColumnMapper +{ + CharToColumnMapper(const wchar_t* chars, const uint16_t* charOffsets, ptrdiff_t lastCharOffset, til::CoordType currentColumn) noexcept; + + til::CoordType GetLeadingColumnAt(ptrdiff_t offset) noexcept; + til::CoordType GetTrailingColumnAt(ptrdiff_t offset) noexcept; + til::CoordType GetLeadingColumnAt(const wchar_t* str) noexcept; + til::CoordType GetTrailingColumnAt(const wchar_t* str) noexcept; + +private: + // See ROW and its members with identical name. + static constexpr uint16_t CharOffsetsTrailer = 0x8000; + static constexpr uint16_t CharOffsetsMask = 0x7fff; + + const wchar_t* _chars; + const uint16_t* _charOffsets; + ptrdiff_t _lastCharOffset; + til::CoordType _currentColumn; +}; + class ROW final { public: @@ -106,7 +128,7 @@ class ROW final bool WasDoubleBytePadded() const noexcept; void SetLineRendition(const LineRendition lineRendition) noexcept; LineRendition GetLineRendition() const noexcept; - uint16_t GetLineWidth() const noexcept; + til::CoordType GetReadableColumnCount() const noexcept; void Reset(const TextAttribute& attr) noexcept; void TransferAttributes(const til::small_rle& attr, til::CoordType newWidth); @@ -128,7 +150,6 @@ class ROW final TextAttribute GetAttrByColumn(til::CoordType column) const; std::vector GetHyperlinks() const; uint16_t size() const noexcept; - til::CoordType LineRenditionColumns() const noexcept; til::CoordType MeasureLeft() const noexcept; til::CoordType MeasureRight() const noexcept; bool ContainsText() const noexcept; @@ -136,6 +157,8 @@ class ROW final DbcsAttribute DbcsAttrAt(til::CoordType column) const noexcept; std::wstring_view GetText() const noexcept; std::wstring_view GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept; + til::CoordType GetLeftAlignedColumnAtChar(ptrdiff_t offset) const noexcept; + til::CoordType GetRightAlignedColumnAtChar(ptrdiff_t offset) const noexcept; DelimiterClass DelimiterClassAt(til::CoordType column, const std::wstring_view& wordDelimiters) const noexcept; auto AttrBegin() const noexcept { return _attr.begin(); } @@ -206,16 +229,21 @@ class ROW final template constexpr uint16_t _clampedColumnInclusive(T v) const noexcept; - uint16_t _adjustBackward(uint16_t column) const noexcept; - uint16_t _adjustForward(uint16_t column) const noexcept; - - wchar_t _uncheckedChar(size_t off) const noexcept; uint16_t _charSize() const noexcept; - uint16_t _uncheckedCharOffset(size_t col) const noexcept; - bool _uncheckedIsTrailer(size_t col) const noexcept; + template + wchar_t _uncheckedChar(T off) const noexcept; + template + uint16_t _uncheckedCharOffset(T col) const noexcept; + template + bool _uncheckedIsTrailer(T col) const noexcept; + template + T _adjustBackward(T column) const noexcept; + template + T _adjustForward(T column) const noexcept; void _init() noexcept; void _resizeChars(uint16_t colEndDirty, uint16_t chBegDirty, size_t chEndDirty, uint16_t chEndDirtyOld); + CharToColumnMapper _createCharToColumnMapper(ptrdiff_t offset) const noexcept; // These fields are a bit "wasteful", but it makes all this a bit more robust against // programming errors during initial development (which is when this comment was written). diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp new file mode 100644 index 00000000000..d23d883b336 --- /dev/null +++ b/src/buffer/out/UTextAdapter.cpp @@ -0,0 +1,262 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "precomp.h" +#include "UTextAdapter.h" + +#include "textBuffer.hpp" + +struct RowRange +{ + til::CoordType begin; + til::CoordType end; +}; + +constexpr size_t& accessLength(UText* ut) noexcept +{ + return *std::bit_cast(&ut->p); +} + +constexpr RowRange& accessRowRange(UText* ut) noexcept +{ + return *std::bit_cast(&ut->a); +} + +constexpr til::CoordType& accessCurrentRow(UText* ut) noexcept +{ + return ut->b; +} + +static UText* U_CALLCONV utextClone(UText* dest, const UText* src, UBool deep, UErrorCode* status) noexcept +{ + __assume(status != nullptr); + + if (deep) + { + *status = U_UNSUPPORTED_ERROR; + return dest; + } + + dest = utext_setup(dest, 0, status); + if (*status <= U_ZERO_ERROR) + { + memcpy(dest, src, sizeof(UText)); + } + + return dest; +} + +static int64_t U_CALLCONV utextLength(UText* ut) noexcept +try +{ + auto length = accessLength(ut); + + if (!length) + { + const auto& textBuffer = *static_cast(ut->context); + const auto range = accessRowRange(ut); + + for (til::CoordType y = range.begin; y < range.end; ++y) + { + length += textBuffer.GetRowByOffset(y).GetText().size(); + } + + accessLength(ut) = length; + } + + return gsl::narrow_cast(length); +} +catch (...) +{ + return 0; +} + +static UBool U_CALLCONV utextAccess(UText* ut, int64_t nativeIndex, UBool forward) noexcept +try +{ + if (!forward) + { + // Even after reading the ICU documentation I'm a little unclear on how to handle the forward flag. + // I _think_ it's basically nothing but "nativeIndex--" for us, but I didn't want to verify it + // because right now we never use any ICU functions that require backwards text access anyways. + return false; + } + + const auto& textBuffer = *static_cast(ut->context); + const auto range = accessRowRange(ut); + auto start = ut->chunkNativeStart; + auto limit = ut->chunkNativeLimit; + auto y = accessCurrentRow(ut); + std::wstring_view text; + + if (nativeIndex >= start && nativeIndex < limit) + { + return true; + } + + if (nativeIndex < start) + { + for (;;) + { + --y; + if (y < range.begin) + { + return false; + } + + text = textBuffer.GetRowByOffset(y).GetText(); + limit = start; + start -= text.size(); + + if (nativeIndex >= start) + { + break; + } + } + } + else + { + for (;;) + { + ++y; + if (y >= range.end) + { + return false; + } + + text = textBuffer.GetRowByOffset(y).GetText(); + start = limit; + limit += text.size(); + + if (nativeIndex < limit) + { + break; + } + } + } + + accessCurrentRow(ut) = y; + ut->chunkNativeStart = start; + ut->chunkNativeLimit = limit; + ut->chunkOffset = gsl::narrow_cast(nativeIndex - start); + ut->chunkLength = gsl::narrow_cast(text.size()); +#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). + ut->chunkContents = reinterpret_cast(text.data()); + ut->nativeIndexingLimit = ut->chunkLength; + return true; +} +catch (...) +{ + return false; +} + +static int32_t U_CALLCONV utextExtract(UText* ut, int64_t nativeStart, int64_t nativeLimit, char16_t* dest, int32_t destCapacity, UErrorCode* status) noexcept +try +{ + __assume(status != nullptr); + + if (*status > U_ZERO_ERROR) + { + return 0; + } + if (destCapacity < 0 || (dest == nullptr && destCapacity > 0) || nativeStart > nativeLimit) + { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if (!utextAccess(ut, nativeStart, true)) + { + return 0; + } + + nativeLimit = std::min(ut->chunkNativeLimit, nativeLimit); + + if (destCapacity <= 0) + { + return gsl::narrow_cast(nativeLimit - nativeStart); + } + + const auto& textBuffer = *static_cast(ut->context); + const auto y = accessCurrentRow(ut); + const auto offset = ut->chunkNativeStart - nativeStart; + const auto text = textBuffer.GetRowByOffset(y).GetText().substr(offset); + const auto length = std::min(gsl::narrow_cast(destCapacity), text.size()); + + memcpy(dest, text.data(), length * sizeof(char16_t)); + + if (length < destCapacity) + { +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + dest[length] = 0; + } + + return gsl::narrow_cast(length); +} +catch (...) +{ + // The only thing that can fail is GetRowByOffset() which in turn can only fail when VirtualAlloc() fails. + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; +} + +static constexpr UTextFuncs utextFuncs{ + .tableSize = sizeof(UTextFuncs), + .clone = utextClone, + .nativeLength = utextLength, + .access = utextAccess, + .extract = utextExtract, +}; + +UText* UTextFromTextBuffer(UText* ut, const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept +{ + __assume(status != nullptr); + + ut = utext_setup(ut, 0, status); + if (*status > U_ZERO_ERROR) + { + return nullptr; + } + + ut->providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS); + ut->pFuncs = &utextFuncs; + ut->context = &textBuffer; + accessCurrentRow(ut) = rowBeg - 1; // the utextAccess() below will advance this by 1. + accessRowRange(ut) = { rowBeg, rowEnd }; + + utextAccess(ut, 0, true); + return ut; +} + +til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t nativeIndexEnd) +{ + // The parameters are given as a half-open [beg,end) range, but the point_span we return in closed [beg,end]. + nativeIndexEnd--; + + const auto& textBuffer = *static_cast(ut->context); + til::point_span ret; + + if (utextAccess(ut, nativeIndexBeg, true)) + { + const auto y = accessCurrentRow(ut); + ret.start.x = textBuffer.GetRowByOffset(y).GetLeftAlignedColumnAtChar(nativeIndexBeg - ut->chunkNativeStart); + ret.start.y = y; + } + else + { + ret.start.y = accessRowRange(ut).begin; + } + + if (utextAccess(ut, nativeIndexEnd, true)) + { + const auto y = accessCurrentRow(ut); + ret.end.x = textBuffer.GetRowByOffset(y).GetLeftAlignedColumnAtChar(nativeIndexEnd - ut->chunkNativeStart); + ret.end.y = y; + } + else + { + ret.end = ret.start; + } + + return ret; +} diff --git a/src/buffer/out/UTextAdapter.h b/src/buffer/out/UTextAdapter.h new file mode 100644 index 00000000000..06d413902a4 --- /dev/null +++ b/src/buffer/out/UTextAdapter.h @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +// Can't forward declare the UErrorCode enum. Thanks C++. +#include + +class TextBuffer; +struct UText; + +UText* UTextFromTextBuffer(UText* ut, const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept; +til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t nativeIndexEnd); diff --git a/src/buffer/out/lib/bufferout.vcxproj b/src/buffer/out/lib/bufferout.vcxproj index f7962921c2d..20385eff096 100644 --- a/src/buffer/out/lib/bufferout.vcxproj +++ b/src/buffer/out/lib/bufferout.vcxproj @@ -26,6 +26,7 @@ Create + @@ -44,6 +45,7 @@ + diff --git a/src/buffer/out/search.cpp b/src/buffer/out/search.cpp index d060f55b7de..1735872ae10 100644 --- a/src/buffer/out/search.cpp +++ b/src/buffer/out/search.cpp @@ -2,11 +2,8 @@ // Licensed under the MIT license. #include "precomp.h" - #include "search.h" -#include - #include "textBuffer.hpp" #include "../types/inc/GlyphWidth.hpp" @@ -20,324 +17,83 @@ using namespace Microsoft::Console::Types; // - textBuffer - The screen text buffer to search through (the "haystack") // - renderData - The IRenderData type reference, it is for providing selection methods // - str - The search term you want to find (the "needle") -// - direction - The direction to search (upward or downward) -// - sensitivity - Whether or not you care about case -Search::Search(Microsoft::Console::Render::IRenderData& renderData, - const std::wstring_view str, - const Direction direction, - const Sensitivity sensitivity) : - _direction(direction), - _sensitivity(sensitivity), - _needle(s_CreateNeedleFromString(str)), - _renderData(renderData), - _coordAnchor(s_GetInitialAnchor(renderData, direction)) +// - reverse - True when searching backward/upwards in the buffer +// - caseInsensitive - As the name indicates: case insensitivity +Search::Search(Microsoft::Console::Render::IRenderData& renderData, const std::wstring_view str, bool reverse, bool caseInsensitive) : + _renderData{ &renderData }, + _step{ reverse ? -1 : 1 } { - _coordNext = _coordAnchor; -} + const auto& textBuffer = _renderData->GetTextBuffer(); -// Routine Description: -// - Constructs a Search object. -// - Make a Search object then call .FindNext() to locate items. -// - Once you've found something, you can perform actions like .Select() or .Color() -// Arguments: -// - textBuffer - The screen text buffer to search through (the "haystack") -// - renderData - The IRenderData type reference, it is for providing selection methods -// - str - The search term you want to find (the "needle") -// - direction - The direction to search (upward or downward) -// - sensitivity - Whether or not you care about case -// - anchor - starting search location in screenInfo -Search::Search(Microsoft::Console::Render::IRenderData& renderData, - const std::wstring_view str, - const Direction direction, - const Sensitivity sensitivity, - const til::point anchor) : - _direction(direction), - _sensitivity(sensitivity), - _needle(s_CreateNeedleFromString(str)), - _coordAnchor(anchor), - _renderData(renderData) -{ - _coordNext = _coordAnchor; -} + _results = textBuffer.SearchText(str, caseInsensitive); + _mutationCount = textBuffer.GetMutationCount(); -// Routine Description -// - Locates the next instance of the search term within the screen buffer. -// Arguments: -// - - Uses internal state from constructor -// Return Value: -// - True if we found another item. False if we've reached the end of the buffer. -// - NOTE: You can FindNext() again after False to go around the buffer again. -bool Search::FindNext() -{ - if (_reachedEnd) + if (_results.empty()) { - _reachedEnd = false; - return false; + return; } - do - { - if (_FindNeedleInHaystackAt(_coordNext, _coordSelStart, _coordSelEnd)) - { - _UpdateNextPosition(); - _reachedEnd = _coordNext == _coordAnchor; - return true; - } - else - { - _UpdateNextPosition(); - } - - } while (_coordNext != _coordAnchor); + const auto highestIndex = gsl::narrow_cast(_results.size()) - 1; + auto firstIndex = reverse ? highestIndex : 0; - return false; -} - -// Routine Description: -// - Takes the found word and selects it in the screen buffer -void Search::Select() const -{ - // Convert buffer selection offsets into the equivalent screen coordinates - // required by SelectNewRegion, taking line renditions into account. - const auto& textBuffer = _renderData.GetTextBuffer(); - const auto selStart = textBuffer.BufferToScreenPosition(_coordSelStart); - const auto selEnd = textBuffer.BufferToScreenPosition(_coordSelEnd); - _renderData.SelectNewRegion(selStart, selEnd); -} - -// Routine Description: -// - Applies the supplied TextAttribute to the current search result. -// Arguments: -// - attr - The attribute to apply to the result -void Search::Color(const TextAttribute attr) const -{ - // Only select if we've found something. - if (_coordSelEnd >= _coordSelStart) + if (_renderData->IsSelectionActive()) { - _renderData.ColorSelection(_coordSelStart, _coordSelEnd, attr); - } -} - -// Routine Description: -// - gets start and end position of text sound by search. only guaranteed to have valid data if FindNext has -// been called and returned true. -// Return Value: -// - pair containing [start, end] coord positions of text found by search -std::pair Search::GetFoundLocation() const noexcept -{ - return { _coordSelStart, _coordSelEnd }; -} - -// Routine Description: -// - Finds the anchor position where we will start searches from. -// - This position will represent the "wrap around" point in the buffer or where -// we reach the end of our search. -// - If the screen buffer given already has a selection in it, it will be used to determine the anchor. -// - Otherwise, we will choose one of the ends of the screen buffer depending on direction. -// Arguments: -// - renderData - The reference to the IRenderData interface type object -// - direction - The intended direction of the search -// Return Value: -// - Coordinate to start the search from. -til::point Search::s_GetInitialAnchor(const Microsoft::Console::Render::IRenderData& renderData, const Direction direction) -{ - const auto& textBuffer = renderData.GetTextBuffer(); - const auto textBufferEndPosition = renderData.GetTextBufferEndPosition(); - if (renderData.IsSelectionActive()) - { - // Convert the screen position of the selection anchor into an equivalent - // buffer position to start searching, taking line rendition into account. - auto anchor = textBuffer.ScreenToBufferPosition(renderData.GetSelectionAnchor()); - - if (direction == Direction::Forward) - { - textBuffer.GetSize().IncrementInBoundsCircular(anchor); - } - else - { - textBuffer.GetSize().DecrementInBoundsCircular(anchor); - // If the selection starts at (0, 0), we need to make sure - // it does not exceed the text buffer end position - anchor.x = std::min(textBufferEndPosition.x, anchor.x); - anchor.y = std::min(textBufferEndPosition.y, anchor.y); - } - return anchor; - } - else - { - if (direction == Direction::Forward) + const auto anchor = textBuffer.ScreenToBufferPosition(_renderData->GetSelectionAnchor()); + if (reverse) { - return { 0, 0 }; + for (; firstIndex >= 0 && til::at(_results, firstIndex).start >= anchor; --firstIndex) + { + } } else { - return textBufferEndPosition; + for (; firstIndex <= highestIndex && til::at(_results, firstIndex).start <= anchor; ++firstIndex) + { + } } } -} -// Routine Description: -// - Attempts to compare the search term (the needle) to the screen buffer (the haystack) -// at the given coordinate position of the screen buffer. -// - Performs one comparison. Call again with new positions to check other spots. -// Arguments: -// - pos - The position in the haystack (screen buffer) to compare -// - start - If we found it, this is filled with the coordinate of the first character of the needle. -// - end - If we found it, this is filled with the coordinate of the last character of the needle. -// Return Value: -// - True if we found it. False if not. -bool Search::_FindNeedleInHaystackAt(const til::point pos, til::point& start, til::point& end) const -{ - start = {}; - end = {}; - - auto bufferPos = pos; - - for (const auto& needleChars : _needle) - { - // Haystack is the buffer. Needle is the string we were given. - const auto hayIter = _renderData.GetTextBuffer().GetTextDataAt(bufferPos); - const auto hayChars = *hayIter; - - // If we didn't match at any point of the needle, return false. - if (!_CompareChars(hayChars, needleChars)) - { - return false; - } - - _IncrementCoord(bufferPos); - } - - _DecrementCoord(bufferPos); - - // If we made it the whole way through the needle, then it was in the haystack. - // Fill out the span that we found the result at and return true. - start = pos; - end = bufferPos; - - return true; + // We reverse the _index by 1 so that the first call to FindNext() moves it to the firstIndex we found. + _index = firstIndex - _step; } -// Routine Description: -// - Provides an abstraction for comparing two spans of text. -// - Internally handles case sensitivity based on object construction. -// Arguments: -// - one - String view representing the first string of text -// - two - String view representing the second string of text -// Return Value: -// - True if they are the same. False otherwise. -bool Search::_CompareChars(const std::wstring_view one, const std::wstring_view two) const noexcept +bool Search::IsStale() const noexcept { - if (one.size() != two.size()) - { - return false; - } - - for (size_t i = 0; i < one.size(); i++) - { - if (_ApplySensitivity(one.at(i)) != _ApplySensitivity(two.at(i))) - { - return false; - } - } - - return true; + return !_renderData || _renderData->GetTextBuffer().GetMutationCount() != _mutationCount; } // Routine Description: -// - Provides an abstraction for conditionally applying case sensitivity -// based on object construction -// Arguments: -// - wch - Character to adjust if necessary -// Return Value: -// - Adjusted value (or not). -wchar_t Search::_ApplySensitivity(const wchar_t wch) const noexcept +// - Takes the found word and selects it in the screen buffer +bool Search::SelectNext() { - if (_sensitivity == Sensitivity::CaseInsensitive) + if (_results.empty()) { - return ::towlower(wch); - } - else - { - return wch; + return false; } -} -// Routine Description: -// - Helper to increment a coordinate in respect to the associated screen buffer -// Arguments -// - coord - Updated by function to increment one position (will wrap X and Y direction) -void Search::_IncrementCoord(til::point& coord) const noexcept -{ - _renderData.GetTextBuffer().GetSize().IncrementInBoundsCircular(coord); -} + const auto count = gsl::narrow_cast(_results.size()); + const auto& textBuffer = _renderData->GetTextBuffer(); -// Routine Description: -// - Helper to decrement a coordinate in respect to the associated screen buffer -// Arguments -// - coord - Updated by function to decrement one position (will wrap X and Y direction) -void Search::_DecrementCoord(til::point& coord) const noexcept -{ - _renderData.GetTextBuffer().GetSize().DecrementInBoundsCircular(coord); -} + _index = (_index + _step + count) % count; -// Routine Description: -// - Helper to update the coordinate position to the next point to be searched -// Return Value: -// - True if we haven't reached the end of the buffer. False otherwise. -void Search::_UpdateNextPosition() -{ - if (_direction == Direction::Forward) - { - _IncrementCoord(_coordNext); - } - else if (_direction == Direction::Backward) - { - _DecrementCoord(_coordNext); - } - else - { - THROW_HR(E_NOTIMPL); - } - - // To reduce wrap-around time, if the next position is larger than - // the end position of the written text - // We put the next position to: - // Forward: (0, 0) - // Backward: the position of the end of the text buffer - const auto bufferEndPosition = _renderData.GetTextBufferEndPosition(); + const auto& s = til::at(_results, _index); + // Convert buffer selection offsets into the equivalent screen coordinates + // required by SelectNewRegion, taking line renditions into account. + const auto selStart = textBuffer.BufferToScreenPosition(s.start); + const auto selEnd = textBuffer.BufferToScreenPosition(s.end); - if (_coordNext.y > bufferEndPosition.y || - (_coordNext.y == bufferEndPosition.y && _coordNext.x > bufferEndPosition.x)) - { - if (_direction == Direction::Forward) - { - _coordNext = {}; - } - else - { - _coordNext = bufferEndPosition; - } - } + _renderData->SelectNewRegion(selStart, selEnd); + return true; } // Routine Description: -// - Creates a "needle" of the correct format for comparison to the screen buffer text data -// that we can use for our search +// - Applies the supplied TextAttribute to all search results. // Arguments: -// - wstr - String that will be our search term -// Return Value: -// - Structured text data for comparison to screen buffer text data. -std::vector Search::s_CreateNeedleFromString(const std::wstring_view wstr) +// - attr - The attribute to apply to the result +void Search::ColorAll(const TextAttribute& attr) const { - std::vector cells; - for (const auto& chars : til::utf16_iterator{ wstr }) + for (const auto& s : _results) { - if (IsGlyphFullWidth(chars)) - { - cells.emplace_back(chars); - } - cells.emplace_back(chars); + _renderData->ColorSelection(s.start, s.end, attr); } - return cells; } diff --git a/src/buffer/out/search.h b/src/buffer/out/search.h index 68f9f82bfd9..b8361188b16 100644 --- a/src/buffer/out/search.h +++ b/src/buffer/out/search.h @@ -21,64 +21,24 @@ Revision History: #include "textBuffer.hpp" #include "../renderer/inc/IRenderData.hpp" -// This used to be in find.h. -#define SEARCH_STRING_LENGTH (80) - class Search final { public: - enum class Direction - { - Forward, - Backward - }; - - enum class Sensitivity - { - CaseInsensitive, - CaseSensitive - }; - - Search(Microsoft::Console::Render::IRenderData& renderData, - const std::wstring_view str, - const Direction dir, - const Sensitivity sensitivity); + Search() = default; + Search(Microsoft::Console::Render::IRenderData& renderData, std::wstring_view str, bool reverse, bool caseInsensitive); - Search(Microsoft::Console::Render::IRenderData& renderData, - const std::wstring_view str, - const Direction dir, - const Sensitivity sensitivity, - const til::point anchor); + bool IsStale() const noexcept; + bool SelectNext(); - bool FindNext(); - void Select() const; - void Color(const TextAttribute attr) const; - - std::pair GetFoundLocation() const noexcept; + void ColorAll(const TextAttribute& attr) const; private: - wchar_t _ApplySensitivity(const wchar_t wch) const noexcept; - bool _FindNeedleInHaystackAt(const til::point pos, til::point& start, til::point& end) const; - bool _CompareChars(const std::wstring_view one, const std::wstring_view two) const noexcept; - void _UpdateNextPosition(); - - void _IncrementCoord(til::point& coord) const noexcept; - void _DecrementCoord(til::point& coord) const noexcept; - - static til::point s_GetInitialAnchor(const Microsoft::Console::Render::IRenderData& renderData, const Direction dir); - - static std::vector s_CreateNeedleFromString(const std::wstring_view wstr); - - bool _reachedEnd = false; - til::point _coordNext; - til::point _coordSelStart; - til::point _coordSelEnd; - - const til::point _coordAnchor; - const std::vector _needle; - const Direction _direction; - const Sensitivity _sensitivity; - Microsoft::Console::Render::IRenderData& _renderData; + // _renderData is a pointer so that Search() is constexpr default constructable. + Microsoft::Console::Render::IRenderData* _renderData = nullptr; + std::vector _results; + ptrdiff_t _index = 0; + ptrdiff_t _step = 0; + uint64_t _mutationCount = 0; #ifdef UNIT_TESTING friend class SearchTests; diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index d120b2e4a98..5219a1a2f51 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -8,15 +8,42 @@ #include #include +#include "UTextAdapter.h" +#include "../../types/inc/GlyphWidth.hpp" #include "../renderer/base/renderer.hpp" -#include "../types/inc/utils.hpp" #include "../types/inc/convert.hpp" -#include "../../types/inc/GlyphWidth.hpp" +#include "../types/inc/utils.hpp" using namespace Microsoft::Console; using namespace Microsoft::Console::Types; using PointTree = interval_tree::IntervalTree; +using unique_URegularExpression = wistd::unique_ptr>; + +constexpr bool allWhitespace(const std::wstring_view& text) noexcept +{ + for (const auto ch : text) + { + if (ch != L' ') + { + return false; + } + } + return true; +} + +static URegularExpression* createURegularExpression(const std::wstring_view& pattern, uint32_t flags, UErrorCode* error) noexcept +{ +#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). + const auto re = uregex_open(reinterpret_cast(pattern.data()), gsl::narrow_cast(pattern.size()), flags, nullptr, error); + // ICU describes the time unit as being dependent on CPU performance and "typically [in] the order of milliseconds", + // but this claim seems highly outdated already. On my CPU from 2021, a limit of 4096 equals roughly 600ms. + uregex_setTimeLimit(re, 4096, error); + uregex_setStackLimit(re, 4 * 1024 * 1024, error); + return re; +} + +static std::atomic s_mutationCountInitialValue; // Routine Description: // - Creates a new instance of TextBuffer @@ -36,6 +63,9 @@ TextBuffer::TextBuffer(til::size screenBufferSize, Microsoft::Console::Render::Renderer& renderer) : _renderer{ renderer }, _currentAttributes{ defaultAttributes }, + // This way every TextBuffer will start with a ""unique"" _mutationCount + // and so it'll compare unequal with the counter of other TextBuffers. + _mutationCount{ s_mutationCountInitialValue.fetch_add(0x100000000) }, _cursor{ cursorSize, *this }, _isActiveBuffer{ isActiveBuffer } { @@ -51,6 +81,10 @@ TextBuffer::~TextBuffer() { _destroy(); } + if (_urlRegex) + { + uregex_close(_urlRegex); + } } // I put these functions in a block at the start of the class, because they're the most @@ -166,6 +200,23 @@ ROW& TextBuffer::_getRowByOffsetDirect(size_t offset) return *reinterpret_cast(row); } +ROW& TextBuffer::_getRow(til::CoordType y) const +{ + // Rows are stored circularly, so the index you ask for is offset by the start position and mod the total of rows. + auto offset = (_firstRow + y) % _height; + + // Support negative wrap around. This way an index of -1 will + // wrap to _rowCount-1 and make implementing scrolling easier. + if (offset < 0) + { + offset += _height; + } + + // We add 1 to the row offset, because row "0" is the one returned by GetScratchpadRow(). +#pragma warning(suppress : 26492) // Don't use const_cast to cast away const or volatile (type.3). + return const_cast(this)->_getRowByOffsetDirect(gsl::narrow_cast(offset) + 1); +} + // Returns the "user-visible" index of the last committed row, which can be used // to short-circuit some algorithms that try to scan the entire buffer. // Returns 0 if no rows are committed in. @@ -183,27 +234,15 @@ til::CoordType TextBuffer::_estimateOffsetOfLastCommittedRow() const noexcept // (what corresponds to the top row of the screen buffer). const ROW& TextBuffer::GetRowByOffset(const til::CoordType index) const { - // The const_cast is safe because "const" never had any meaning in C++ in the first place. -#pragma warning(suppress : 26492) // Don't use const_cast to cast away const or volatile (type.3). - return const_cast(this)->GetRowByOffset(index); + return _getRow(index); } // Retrieves a row from the buffer by its offset from the first row of the text buffer // (what corresponds to the top row of the screen buffer). -ROW& TextBuffer::GetRowByOffset(const til::CoordType index) +ROW& TextBuffer::GetMutableRowByOffset(const til::CoordType index) { - // Rows are stored circularly, so the index you ask for is offset by the start position and mod the total of rows. - auto offset = (_firstRow + index) % _height; - - // Support negative wrap around. This way an index of -1 will - // wrap to _rowCount-1 and make implementing scrolling easier. - if (offset < 0) - { - offset += _height; - } - - // We add 1 to the row offset, because row "0" is the one returned by GetScratchpadRow(). - return _getRowByOffsetDirect(gsl::narrow_cast(offset) + 1); + _mutationCount++; + return _getRow(index); } // Returns a row filled with whitespace and the current attributes, for you to freely use. @@ -345,91 +384,6 @@ TextBufferCellIterator TextBuffer::GetCellDataAt(const til::point at, const View return TextBufferCellIterator(*this, at, limit); } -//Routine Description: -// - Corrects and enforces consistent double byte character state (KAttrs line) within a row of the text buffer. -// - This will take the given double byte information and check that it will be consistent when inserted into the buffer -// at the current cursor position. -// - It will correct the buffer (by erasing the character prior to the cursor) if necessary to make a consistent state. -//Arguments: -// - dbcsAttribute - Double byte information associated with the character about to be inserted into the buffer -//Return Value: -// - True if it is valid to insert a character with the given double byte attributes. False otherwise. -bool TextBuffer::_AssertValidDoubleByteSequence(const DbcsAttribute dbcsAttribute) -{ - // To figure out if the sequence is valid, we have to look at the character that comes before the current one - const auto coordPrevPosition = _GetPreviousFromCursor(); - auto& prevRow = GetRowByOffset(coordPrevPosition.y); - DbcsAttribute prevDbcsAttr = DbcsAttribute::Single; - try - { - prevDbcsAttr = prevRow.DbcsAttrAt(coordPrevPosition.x); - } - catch (...) - { - LOG_HR(wil::ResultFromCaughtException()); - return false; - } - - auto fValidSequence = true; // Valid until proven otherwise - auto fCorrectableByErase = false; // Can't be corrected until proven otherwise - - // Here's the matrix of valid items: - // N = None (single byte) - // L = Lead (leading byte of double byte sequence - // T = Trail (trailing byte of double byte sequence - // Prev Curr Result - // N N OK. - // N L OK. - // N T Fail, uncorrectable. Trailing byte must have had leading before it. - // L N Fail, OK with erase. Lead needs trailing pair. Can erase lead to correct. - // L L Fail, OK with erase. Lead needs trailing pair. Can erase prev lead to correct. - // L T OK. - // T N OK. - // T L OK. - // T T Fail, uncorrectable. New trailing byte must have had leading before it. - - // Check for only failing portions of the matrix: - if (prevDbcsAttr == DbcsAttribute::Single && dbcsAttribute == DbcsAttribute::Trailing) - { - // N, T failing case (uncorrectable) - fValidSequence = false; - } - else if (prevDbcsAttr == DbcsAttribute::Leading) - { - if (dbcsAttribute == DbcsAttribute::Single || dbcsAttribute == DbcsAttribute::Leading) - { - // L, N and L, L failing cases (correctable) - fValidSequence = false; - fCorrectableByErase = true; - } - } - else if (prevDbcsAttr == DbcsAttribute::Trailing && dbcsAttribute == DbcsAttribute::Trailing) - { - // T, T failing case (uncorrectable) - fValidSequence = false; - } - - // If it's correctable by erase, erase the previous character - if (fCorrectableByErase) - { - // Erase previous character into an N type. - try - { - prevRow.ClearCell(coordPrevPosition.x); - } - catch (...) - { - LOG_HR(wil::ResultFromCaughtException()); - return false; - } - - // Sequence is now N N or N L, which are both okay. Set sequence back to valid. - fValidSequence = true; - } - - return fValidSequence; -} - //Routine Description: // - Call before inserting a character into the buffer. // - This will ensure a consistent double byte state (KAttrs line) within the text buffer @@ -453,7 +407,7 @@ void TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute if (cursorPosition.x == lineWidth - 1) { // set that we're wrapping for double byte reasons - auto& row = GetRowByOffset(cursorPosition.y); + auto& row = GetMutableRowByOffset(cursorPosition.y); row.SetDoubleBytePadded(true); // then move the cursor forward and onto the next row @@ -482,7 +436,7 @@ size_t TextBuffer::GraphemePrev(const std::wstring_view& chars, size_t position) // You can continue calling the function on the same row as long as state.columnEnd < state.columnLimit. void TextBuffer::Write(til::CoordType row, const TextAttribute& attributes, RowWriteState& state) { - auto& r = GetRowByOffset(row); + auto& r = GetMutableRowByOffset(row); r.ReplaceText(state); r.ReplaceAttributes(state.columnBegin, state.columnEnd, attributes); TriggerRedraw(Viewport::FromExclusive({ state.columnBeginDirty, row, state.columnEndDirty, row + 1 })); @@ -535,7 +489,7 @@ void TextBuffer::FillRect(const til::rect& rect, const std::wstring_view& fill, for (auto y = rect.top; y < rect.bottom; ++y) { - auto& r = GetRowByOffset(y); + auto& r = GetMutableRowByOffset(y); r.CopyTextFrom(state); r.ReplaceAttributes(rect.left, rect.right, attributes); TriggerRedraw(Viewport::FromExclusive({ state.columnBeginDirty, y, state.columnEndDirty, y + 1 })); @@ -616,7 +570,7 @@ OutputCellIterator TextBuffer::WriteLine(const OutputCellIterator givenIt, } // Get the row and write the cells - auto& row = GetRowByOffset(target.y); + auto& row = GetMutableRowByOffset(target.y); const auto newIt = row.WriteCells(givenIt, target.x, wrap, limitRight); // Take the cell distance written and notify that it needs to be repainted. @@ -648,7 +602,7 @@ void TextBuffer::InsertCharacter(const std::wstring_view chars, const auto iCol = GetCursor().GetPosition().x; // column logical and array positions are equal. // Get the row associated with the given logical position - auto& Row = GetRowByOffset(iRow); + auto& Row = GetMutableRowByOffset(iRow); // Store character and double byte data switch (dbcsAttribute) @@ -708,7 +662,7 @@ void TextBuffer::_AdjustWrapOnCurrentRow(const bool fSet) const auto uiCurrentRowOffset = GetCursor().GetPosition().y; // Set the wrap status as appropriate - GetRowByOffset(uiCurrentRowOffset).SetWrapForced(fSet); + GetMutableRowByOffset(uiCurrentRowOffset).SetWrapForced(fSet); } //Routine Description: @@ -784,7 +738,7 @@ void TextBuffer::IncrementCircularBuffer(const TextAttribute& fillAttributes) _PruneHyperlinks(); // Second, clean out the old "first row" as it will become the "last row" of the buffer after the circle is performed. - GetRowByOffset(0).Reset(fillAttributes); + GetMutableRowByOffset(0).Reset(fillAttributes); { // Now proceed to increment. // Incrementing it will cause the next line down to become the new "top" of the window (the new "0" in logical coordinates) @@ -955,7 +909,7 @@ void TextBuffer::ScrollRows(const til::CoordType firstRow, til::CoordType size, for (; y != end; y += step) { - GetRowByOffset(y + delta).CopyFrom(GetRowByOffset(y)); + GetMutableRowByOffset(y + delta).CopyFrom(GetRowByOffset(y)); } } @@ -969,6 +923,11 @@ const Cursor& TextBuffer::GetCursor() const noexcept return _cursor; } +uint64_t TextBuffer::GetMutationCount() const noexcept +{ + return _mutationCount; +} + const TextAttribute& TextBuffer::GetCurrentAttributes() const noexcept { return _currentAttributes; @@ -981,14 +940,14 @@ void TextBuffer::SetCurrentAttributes(const TextAttribute& currentAttributes) no void TextBuffer::SetWrapForced(const til::CoordType y, bool wrap) { - GetRowByOffset(y).SetWrapForced(wrap); + GetMutableRowByOffset(y).SetWrapForced(wrap); } void TextBuffer::SetCurrentLineRendition(const LineRendition lineRendition, const TextAttribute& fillAttributes) { const auto cursorPosition = GetCursor().GetPosition(); const auto rowIndex = cursorPosition.y; - auto& row = GetRowByOffset(rowIndex); + auto& row = GetMutableRowByOffset(rowIndex); if (row.GetLineRendition() != lineRendition) { row.SetLineRendition(lineRendition); @@ -1013,7 +972,7 @@ void TextBuffer::ResetLineRenditionRange(const til::CoordType startRow, const ti { for (auto row = startRow; row < endRow; row++) { - GetRowByOffset(row).SetLineRendition(LineRendition::SingleWidth); + GetMutableRowByOffset(row).SetLineRendition(LineRendition::SingleWidth); } } @@ -1090,7 +1049,7 @@ void TextBuffer::Reset() noexcept for (; dstRow < copyableRows; ++dstRow, ++srcRow) { - newBuffer.GetRowByOffset(dstRow).CopyFrom(GetRowByOffset(srcRow)); + newBuffer.GetMutableRowByOffset(dstRow).CopyFrom(GetRowByOffset(srcRow)); } // NOTE: Keep this in sync with _reserve(). @@ -2446,7 +2405,7 @@ try const auto newBufferPos = newCursor.GetPosition(); if (newBufferPos.x == 0) { - auto& newRow = newBuffer.GetRowByOffset(newBufferPos.y); + auto& newRow = newBuffer.GetMutableRowByOffset(newBufferPos.y); newRow.SetLineRendition(row.GetLineRendition()); } @@ -2516,7 +2475,7 @@ try // copy attributes from the old row till the end of the new row, and // move on. const auto newRowY = newCursor.GetPosition().y; - auto& newRow = newBuffer.GetRowByOffset(newRowY); + auto& newRow = newBuffer.GetMutableRowByOffset(newRowY); auto newAttrColumn = newCursor.GetPosition().x; const auto newWidth = newBuffer.GetLineWidth(newRowY); // Stop when we get to the end of the buffer width, or the new position @@ -2631,7 +2590,7 @@ try // into the new one, and resize the row to match. We'll rely on the // behavior of ATTR_ROW::Resize to trim down when narrower, or extend // the last attr when wider. - auto& newRow = newBuffer.GetRowByOffset(newRowY); + auto& newRow = newBuffer.GetMutableRowByOffset(newRowY); const auto newWidth = newBuffer.GetLineWidth(newRowY); newRow.TransferAttributes(row.Attributes(), newWidth); @@ -2641,7 +2600,6 @@ try // Finish copying remaining parameters from the old text buffer to the new one newBuffer.CopyProperties(oldBuffer); newBuffer.CopyHyperlinkMaps(oldBuffer); - newBuffer.CopyPatterns(oldBuffer); // If we found where to put the cursor while placing characters into the buffer, // just put the cursor there. Otherwise we have to advance manually. @@ -2804,103 +2762,87 @@ void TextBuffer::CopyHyperlinkMaps(const TextBuffer& other) } // Method Description: -// - Adds a regex pattern we should search for -// - The searching does not happen here, we only search when asked to by TerminalCore +// - Finds patterns within the requested region of the text buffer // Arguments: -// - The regex pattern +// - The firstRow to start searching from +// - The lastRow to search // Return value: -// - An ID that the caller should associate with the given pattern -const size_t TextBuffer::AddPatternRecognizer(const std::wstring_view regexString) +// - An interval tree containing the patterns found +PointTree TextBuffer::GetPatterns(const til::CoordType firstRow, const til::CoordType lastRow) { - ++_currentPatternId; - _idsAndPatterns.emplace(std::make_pair(_currentPatternId, regexString)); - return _currentPatternId; -} + PointTree::interval_vector intervals; -// Method Description: -// - Clears the patterns we know of and resets the pattern ID counter -void TextBuffer::ClearPatternRecognizers() noexcept -{ - _idsAndPatterns.clear(); - _currentPatternId = 0; + UErrorCode status = U_ZERO_ERROR; +#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47). + UText text = UTEXT_INITIALIZER; + UTextFromTextBuffer(&text, *this, firstRow, lastRow + 1, &status); + + if (!_urlRegex) + { + static constexpr std::wstring_view linkPattern{ LR"(\b(?:https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|$!:,.;]*[A-Za-z0-9+&@#/%=~_|$])" }; + _urlRegex = createURegularExpression(linkPattern, 0, &status); + assert(_urlRegex); + } + + uregex_setUText(_urlRegex, &text, &status); + + if (uregex_find(_urlRegex, -1, &status)) + { + do + { + const auto nativeIndexBeg = uregex_start64(_urlRegex, 0, &status); + const auto nativeIndexEnd = uregex_end64(_urlRegex, 0, &status); + const auto range = BufferRangeFromUText(&text, nativeIndexBeg, nativeIndexEnd); + intervals.push_back(PointTree::interval(range.start, range.end, 0)); + } while (uregex_findNext(_urlRegex, &status)); + } + + return PointTree{ std::move(intervals) }; } -// Method Description: -// - Copies the patterns the other buffer knows about into this one -// Arguments: -// - The other buffer -void TextBuffer::CopyPatterns(const TextBuffer& OtherBuffer) +// Searches through the entire (committed) text buffer for `needle` and returns the coordinates in absolute coordinates. +// The end coordinates of the returned ranges are considered inclusive. +std::vector TextBuffer::SearchText(const std::wstring_view& needle, bool caseInsensitive) const { - _idsAndPatterns = OtherBuffer._idsAndPatterns; - _currentPatternId = OtherBuffer._currentPatternId; + return SearchText(needle, caseInsensitive, 0, til::CoordTypeMax); } -// Method Description: -// - Finds patterns within the requested region of the text buffer -// Arguments: -// - The firstRow to start searching from -// - The lastRow to search -// Return value: -// - An interval tree containing the patterns found -PointTree TextBuffer::GetPatterns(const til::CoordType firstRow, const til::CoordType lastRow) const +// Searches through the given rows [rowBeg,rowEnd) for `needle` and returns the coordinates in absolute coordinates. +// While the end coordinates of the returned ranges are considered inclusive, the [rowBeg,rowEnd) range is half-open. +std::vector TextBuffer::SearchText(const std::wstring_view& needle, bool caseInsensitive, til::CoordType rowBeg, til::CoordType rowEnd) const { - PointTree::interval_vector intervals; + rowEnd = std::min(rowEnd, _estimateOffsetOfLastCommittedRow() + 1); - std::wstring concatAll; - const auto rowSize = GetRowByOffset(0).size(); - concatAll.reserve(gsl::narrow_cast(rowSize) * gsl::narrow_cast(lastRow - firstRow + 1)); + std::vector results; - // to deal with text that spans multiple lines, we will first concatenate - // all the text into one string and find the patterns in that string - for (til::CoordType i = firstRow; i <= lastRow; ++i) + // All whitespace strings would match the not-yet-written parts of the TextBuffer which would be weird. + if (allWhitespace(needle) || rowBeg >= rowEnd) { - auto& row = GetRowByOffset(i); - concatAll += row.GetText(); + return results; } - // for each pattern we know of, iterate through the string - for (const auto& idAndPattern : _idsAndPatterns) - { - std::wregex regexObj{ idAndPattern.second }; + uint32_t flags = UREGEX_LITERAL; + WI_SetFlagIf(flags, UREGEX_CASE_INSENSITIVE, caseInsensitive); - // search through the run with our regex object - auto words_begin = std::wsregex_iterator(concatAll.begin(), concatAll.end(), regexObj); - auto words_end = std::wsregex_iterator(); + UErrorCode status = U_ZERO_ERROR; +#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47). + UText text = UTEXT_INITIALIZER; + UTextFromTextBuffer(&text, *this, rowBeg, rowEnd, &status); - til::CoordType lenUpToThis = 0; - for (auto i = words_begin; i != words_end; ++i) + const unique_URegularExpression re{ createURegularExpression(needle, flags, &status) }; + uregex_setUText(re.get(), &text, &status); + + if (uregex_find(re.get(), -1, &status)) + { + do { - // record the locations - - // when we find a match, the prefix is text that is between this - // match and the previous match, so we use the size of the prefix - // along with the size of the match to determine the locations - til::CoordType prefixSize = 0; - for (const auto str = i->prefix().str(); const auto& glyph : til::utf16_iterator{ str }) - { - prefixSize += IsGlyphFullWidth(glyph) ? 2 : 1; - } - const auto start = lenUpToThis + prefixSize; - til::CoordType matchSize = 0; - for (const auto str = i->str(); const auto& glyph : til::utf16_iterator{ str }) - { - matchSize += IsGlyphFullWidth(glyph) ? 2 : 1; - } - const auto end = start + matchSize; - lenUpToThis = end; - - const til::point startCoord{ start % rowSize, start / rowSize }; - const til::point endCoord{ end % rowSize, end / rowSize }; - - // store the intervals - // NOTE: these intervals are relative to the VIEWPORT not the buffer - // Keeping these relative to the viewport for now because its the renderer - // that actually uses these locations and the renderer works relative to - // the viewport - intervals.push_back(PointTree::interval(startCoord, endCoord, idAndPattern.first)); - } + const auto nativeIndexBeg = uregex_start64(re.get(), 0, &status); + const auto nativeIndexEnd = uregex_end64(re.get(), 0, &status); + results.emplace_back(BufferRangeFromUText(&text, nativeIndexBeg, nativeIndexEnd)); + } while (uregex_findNext(re.get(), &status)); } - PointTree result(std::move(intervals)); - return result; + + return results; } const std::vector& TextBuffer::GetMarks() const noexcept diff --git a/src/buffer/out/textBuffer.hpp b/src/buffer/out/textBuffer.hpp index d4cf6f74214..23e7b77d254 100644 --- a/src/buffer/out/textBuffer.hpp +++ b/src/buffer/out/textBuffer.hpp @@ -59,6 +59,8 @@ filling in the last row, and updating the screen. #include "../buffer/out/textBufferCellIterator.hpp" #include "../buffer/out/textBufferTextIterator.hpp" +struct URegularExpression; + namespace Microsoft::Console::Render { class Renderer; @@ -122,7 +124,7 @@ class TextBuffer final ROW& GetScratchpadRow(); ROW& GetScratchpadRow(const TextAttribute& attributes); const ROW& GetRowByOffset(til::CoordType index) const; - ROW& GetRowByOffset(til::CoordType index); + ROW& GetMutableRowByOffset(til::CoordType index); TextBufferCellIterator GetCellDataAt(const til::point at) const; TextBufferCellIterator GetCellLineDataAt(const til::point at) const; @@ -164,6 +166,7 @@ class TextBuffer final Cursor& GetCursor() noexcept; const Cursor& GetCursor() const noexcept; + uint64_t GetMutationCount() const noexcept; const til::CoordType GetFirstRowIndex() const noexcept; const Microsoft::Console::Types::Viewport GetSize() const noexcept; @@ -262,10 +265,9 @@ class TextBuffer final const std::optional lastCharacterViewport, std::optional> positionInfo); - const size_t AddPatternRecognizer(const std::wstring_view regexString); - void ClearPatternRecognizers() noexcept; - void CopyPatterns(const TextBuffer& OtherBuffer); - interval_tree::IntervalTree GetPatterns(const til::CoordType firstRow, const til::CoordType lastRow) const; + interval_tree::IntervalTree GetPatterns(const til::CoordType firstRow, const til::CoordType lastRow); + std::vector SearchText(const std::wstring_view& needle, bool caseInsensitive) const; + std::vector SearchText(const std::wstring_view& needle, bool caseInsensitive, til::CoordType rowBeg, til::CoordType rowEnd) const; const std::vector& GetMarks() const noexcept; void ClearMarksInRange(const til::point start, const til::point end); @@ -285,6 +287,7 @@ class TextBuffer final void _construct(const std::byte* until) noexcept; void _destroy() const noexcept; ROW& _getRowByOffsetDirect(size_t offset); + ROW& _getRow(til::CoordType y) const; til::CoordType _estimateOffsetOfLastCommittedRow() const noexcept; void _SetFirstRowIndex(const til::CoordType FirstRowIndex) noexcept; @@ -293,7 +296,6 @@ class TextBuffer final void _AdjustWrapOnCurrentRow(const bool fSet); // Assist with maintaining proper buffer state for Double Byte character sequences void _PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute); - bool _AssertValidDoubleByteSequence(const DbcsAttribute dbcsAttribute); void _ExpandTextRow(til::inclusive_rect& selectionRow) const; DelimiterClass _GetDelimiterClassAt(const til::point pos, const std::wstring_view wordDelimiters) const; til::point _GetWordStartForAccessibility(const til::point target, const std::wstring_view wordDelimiters) const; @@ -311,9 +313,6 @@ class TextBuffer final std::unordered_map _hyperlinkCustomIdMap; uint16_t _currentHyperlinkId = 1; - std::unordered_map _idsAndPatterns; - size_t _currentPatternId = 0; - // This block describes the state of the underlying virtual memory buffer that holds all ROWs, text and attributes. // Initially memory is only allocated with MEM_RESERVE to reduce the private working set of conhost. // ROWs are laid out like this in memory: @@ -373,12 +372,15 @@ class TextBuffer final TextAttribute _currentAttributes; til::CoordType _firstRow = 0; // indexes top row (not necessarily 0) + uint64_t _mutationCount = 0; Cursor _cursor; - - bool _isActiveBuffer = false; - std::vector _marks; + // While safe RAII wrappers for URegularExpression* would improve maintainability, it would make + // it more difficult to avoid having to forward declare (or outright expose) more ICU stuff. + // This may make switching to other Unicode libraries easier in the distant future. + URegularExpression* _urlRegex = nullptr; + bool _isActiveBuffer = false; #ifdef UNIT_TESTING friend class TextBufferTests; diff --git a/src/cascadia/TerminalControl/ControlCore.cpp b/src/cascadia/TerminalControl/ControlCore.cpp index 87e98c0a1b7..c1d67d95d9e 100644 --- a/src/cascadia/TerminalControl/ControlCore.cpp +++ b/src/cascadia/TerminalControl/ControlCore.cpp @@ -1539,31 +1539,22 @@ namespace winrt::Microsoft::Terminal::Control::implementation // - caseSensitive: boolean that represents if the current search is case sensitive // Return Value: // - - void ControlCore::Search(const winrt::hstring& text, - const bool goForward, - const bool caseSensitive) + void ControlCore::Search(const winrt::hstring& text, const bool goForward, const bool caseSensitive) { - if (text.size() == 0) + auto lock = _terminal->LockForWriting(); + _terminal->SetBlockSelection(false); + + if (_searcher.IsStale() || _searcherText != text || _searcherGoForward != goForward || _searcherCaseSensitive != caseSensitive) { - return; + _searcher = { *GetRenderData(), text, !goForward, !caseSensitive }; + _searcherText = text; + _searcherGoForward = goForward; + _searcherCaseSensitive = caseSensitive; } - const auto direction = goForward ? - Search::Direction::Forward : - Search::Direction::Backward; - - const auto sensitivity = caseSensitive ? - Search::Sensitivity::CaseSensitive : - Search::Sensitivity::CaseInsensitive; - - ::Search search(*GetRenderData(), text.c_str(), direction, sensitivity); - auto lock = _terminal->LockForWriting(); - const auto foundMatch{ search.FindNext() }; + const auto foundMatch = _searcher.SelectNext(); if (foundMatch) { - _terminal->SetBlockSelection(false); - search.Select(); - // this is used for search, // DO NOT call _updateSelectionUI() here. // We don't want to show the markers so manually tell it to clear it. @@ -1573,8 +1564,12 @@ namespace winrt::Microsoft::Terminal::Control::implementation // Raise a FoundMatch event, which the control will use to notify // narrator if there was any results in the buffer - auto foundResults = winrt::make_self(foundMatch); - _FoundMatchHandlers(*this, *foundResults); + _FoundMatchHandlers(*this, winrt::make(foundMatch)); + } + + void ControlCore::ClearSearch() + { + _searcher = {}; } void ControlCore::Close() diff --git a/src/cascadia/TerminalControl/ControlCore.h b/src/cascadia/TerminalControl/ControlCore.h index 5f9f23a3e89..88b7c6d1ee1 100644 --- a/src/cascadia/TerminalControl/ControlCore.h +++ b/src/cascadia/TerminalControl/ControlCore.h @@ -205,9 +205,8 @@ namespace winrt::Microsoft::Terminal::Control::implementation void SetSelectionAnchor(const til::point position); void SetEndSelectionPoint(const til::point position); - void Search(const winrt::hstring& text, - const bool goForward, - const bool caseSensitive); + void Search(const winrt::hstring& text, const bool goForward, const bool caseSensitive); + void ClearSearch(); void LeftClickOnTerminal(const til::point terminalPosition, const int numberOfClicks, @@ -305,6 +304,11 @@ namespace winrt::Microsoft::Terminal::Control::implementation std::unique_ptr<::Microsoft::Console::Render::IRenderEngine> _renderEngine{ nullptr }; std::unique_ptr<::Microsoft::Console::Render::Renderer> _renderer{ nullptr }; + ::Search _searcher; + winrt::hstring _searcherText; + bool _searcherGoForward = false; + bool _searcherCaseSensitive = false; + winrt::handle _lastSwapChainHandle{ nullptr }; FontInfoDesired _desiredFont; diff --git a/src/cascadia/TerminalControl/ControlCore.idl b/src/cascadia/TerminalControl/ControlCore.idl index 8157346dcd1..0f1cc8be87f 100644 --- a/src/cascadia/TerminalControl/ControlCore.idl +++ b/src/cascadia/TerminalControl/ControlCore.idl @@ -128,6 +128,7 @@ namespace Microsoft.Terminal.Control void ResumeRendering(); void BlinkAttributeTick(); void Search(String text, Boolean goForward, Boolean caseSensitive); + void ClearSearch(); Microsoft.Terminal.Core.Color BackgroundColor { get; }; SelectionData SelectionInfo { get; }; diff --git a/src/cascadia/TerminalControl/TermControl.cpp b/src/cascadia/TerminalControl/TermControl.cpp index 2bce8ecc0d0..3f362ad6852 100644 --- a/src/cascadia/TerminalControl/TermControl.cpp +++ b/src/cascadia/TerminalControl/TermControl.cpp @@ -414,6 +414,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation void TermControl::_CloseSearchBoxControl(const winrt::Windows::Foundation::IInspectable& /*sender*/, const RoutedEventArgs& /*args*/) { + _core.ClearSearch(); _searchBox->Visibility(Visibility::Collapsed); // Set focus back to terminal control diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index 75161e5c7f0..ada1bed5b62 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -111,7 +111,6 @@ void Terminal::UpdateSettings(ICoreSettings settings) if (_mainBuffer) { // Clear the patterns first - _mainBuffer->ClearPatternRecognizers(); _detectURLs = settings.DetectURLs(); _updateUrlDetection(); } @@ -1337,9 +1336,6 @@ void Terminal::_updateUrlDetection() { if (_detectURLs) { - // Add regex pattern recognizers to the buffer - // For now, we only add the URI regex pattern - _hyperlinkPatternId = _activeBuffer().AddPatternRecognizer(linkPattern); UpdatePatternsUnderLock(); } else @@ -1483,12 +1479,8 @@ void Terminal::ColorSelection(const TextAttribute& attr, winrt::Microsoft::Termi if (!text.empty()) { - Search search(*this, text, Search::Direction::Forward, Search::Sensitivity::CaseInsensitive, { 0, 0 }); - - while (search.FindNext()) - { - search.Color(attr); - } + const Search search(*this, text, false, true); + search.ColorAll(attr); } } } diff --git a/src/cascadia/TerminalCore/Terminal.hpp b/src/cascadia/TerminalCore/Terminal.hpp index 8e63819bdb6..a0110cfbfb9 100644 --- a/src/cascadia/TerminalCore/Terminal.hpp +++ b/src/cascadia/TerminalCore/Terminal.hpp @@ -17,7 +17,6 @@ #include -inline constexpr std::wstring_view linkPattern{ LR"(\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|$!:,.;]*[A-Za-z0-9+&@#/%=~_|$])" }; inline constexpr size_t TaskbarMinProgress{ 10 }; // You have to forward decl the ICoreSettings here, instead of including the header. diff --git a/src/cascadia/TerminalCore/TerminalApi.cpp b/src/cascadia/TerminalCore/TerminalApi.cpp index e19f158adf8..4c31cbc4bbc 100644 --- a/src/cascadia/TerminalCore/TerminalApi.cpp +++ b/src/cascadia/TerminalCore/TerminalApi.cpp @@ -193,7 +193,6 @@ void Terminal::UseAlternateScreenBuffer(const TextAttribute& attrs) const auto cursorSize = _mainBuffer->GetCursor().GetSize(); ClearSelection(); - _mainBuffer->ClearPatternRecognizers(); // Create a new alt buffer _altBuffer = std::make_unique(_altBufferSize, @@ -272,7 +271,6 @@ void Terminal::UseMainScreenBuffer() } // update all the hyperlinks on the screen - _mainBuffer->ClearPatternRecognizers(); _updateUrlDetection(); // GH#3321: Make sure we let the TerminalInput know that we switched diff --git a/src/host/_stream.cpp b/src/host/_stream.cpp index d85d4e105f8..cc858f74bef 100644 --- a/src/host/_stream.cpp +++ b/src/host/_stream.cpp @@ -247,7 +247,7 @@ try AdjustCursorPosition(screenInfo, pos, keepCursorVisible, psScrollY); const auto y = cursor.GetPosition().y; - auto& row = textBuffer.GetRowByOffset(y); + auto& row = textBuffer.GetMutableRowByOffset(y); pos.x = textBuffer.GetSize().RightExclusive(); pos.y = y; @@ -408,7 +408,7 @@ try pos.x = 0; } - textBuffer.GetRowByOffset(pos.y).SetWrapForced(false); + textBuffer.GetMutableRowByOffset(pos.y).SetWrapForced(false); pos.y = pos.y + 1; AdjustCursorPosition(screenInfo, pos, keepCursorVisible, psScrollY); continue; diff --git a/src/host/selectionInput.cpp b/src/host/selectionInput.cpp index d85c5835bd0..37ad00c96a6 100644 --- a/src/host/selectionInput.cpp +++ b/src/host/selectionInput.cpp @@ -708,11 +708,8 @@ bool Selection::_HandleColorSelection(const INPUT_KEY_INFO* const pInputKeyInfo) Telemetry::Instance().LogColorSelectionUsed(); - Search search(gci.renderData, str, Search::Direction::Forward, Search::Sensitivity::CaseInsensitive); - while (search.FindNext()) - { - search.Color(selectionAttr); - } + const Search search(gci.renderData, str, false, true); + search.ColorAll(selectionAttr); } } CATCH_LOG(); diff --git a/src/host/telemetry.cpp b/src/host/telemetry.cpp index c8318f118a0..21cff15e815 100644 --- a/src/host/telemetry.cpp +++ b/src/host/telemetry.cpp @@ -21,10 +21,6 @@ TRACELOGGING_DEFINE_PROVIDER(g_hConhostV2EventTraceProvider, // Disable 4351 so we can initialize the arrays to 0 without a warning. #pragma warning(disable : 4351) Telemetry::Telemetry() : - _fpFindStringLengthAverage(0), - _fpDirectionDownAverage(0), - _fpMatchCaseAverage(0), - _uiFindNextClickedTotal(0), _uiColorSelectionUsed(0), _tStartedAt(0), _wchProcessFileNames(), @@ -177,40 +173,6 @@ void Telemetry::LogApiCall(const ApiCall api) _rguiTimesApiUsed[api]++; } -// Log usage of the Find Dialog. -void Telemetry::LogFindDialogNextClicked(const unsigned int uiStringLength, const bool fDirectionDown, const bool fMatchCase) -{ - // Don't send telemetry for every time it's used, as this will help reduce the load on our servers. - // Instead just create a running average of the string length, the direction down radio - // button, and match case checkbox. - _fpFindStringLengthAverage = ((_fpFindStringLengthAverage * _uiFindNextClickedTotal + uiStringLength) / (_uiFindNextClickedTotal + 1)); - _fpDirectionDownAverage = ((_fpDirectionDownAverage * _uiFindNextClickedTotal + (fDirectionDown ? 1 : 0)) / (_uiFindNextClickedTotal + 1)); - _fpMatchCaseAverage = ((_fpMatchCaseAverage * _uiFindNextClickedTotal + (fMatchCase ? 1 : 0)) / (_uiFindNextClickedTotal + 1)); - _uiFindNextClickedTotal++; -} - -// Find dialog was closed, now send out the telemetry. -void Telemetry::FindDialogClosed() -{ - // clang-format off -#pragma prefast(suppress: __WARNING_NONCONST_LOCAL, "Activity can't be const, since it's set to a random value on startup.") - // clang-format on - TraceLoggingWriteTagged(_activity, - "FindDialogUsed", - TraceLoggingValue(_fpFindStringLengthAverage, "StringLengthAverage"), - TraceLoggingValue(_fpDirectionDownAverage, "DirectionDownAverage"), - TraceLoggingValue(_fpMatchCaseAverage, "MatchCaseAverage"), - TraceLoggingValue(_uiFindNextClickedTotal, "FindNextButtonClickedTotal"), - TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), - TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage)); - - // Get ready for the next time the dialog is used. - _fpFindStringLengthAverage = 0; - _fpDirectionDownAverage = 0; - _fpMatchCaseAverage = 0; - _uiFindNextClickedTotal = 0; -} - // Tries to find the process name amongst our previous process names by doing a binary search. // The main difference between this and the standard bsearch library call, is that if this // can't find the string, it returns the position the new string should be inserted at. This saves diff --git a/src/host/telemetry.hpp b/src/host/telemetry.hpp index 45cbea796a4..4cb2560a3b5 100644 --- a/src/host/telemetry.hpp +++ b/src/host/telemetry.hpp @@ -44,9 +44,7 @@ class Telemetry void LogQuickEditPasteRawUsed(); void LogColorSelectionUsed(); - void LogFindDialogNextClicked(const unsigned int iStringLength, const bool fDirectionDown, const bool fMatchCase); void LogProcessConnected(const HANDLE hProcess); - void FindDialogClosed(); void WriteFinalTraceLog(); void LogRipMessage(_In_z_ const char* pszMessage, ...) const; @@ -138,10 +136,6 @@ class Telemetry TraceLoggingActivity _activity; - float _fpFindStringLengthAverage; - float _fpDirectionDownAverage; - float _fpMatchCaseAverage; - unsigned int _uiFindNextClickedTotal; unsigned int _uiColorSelectionUsed; time_t _tStartedAt; WCHAR const* const c_pwszBashExeName = L"bash.exe"; diff --git a/src/inc/til/at.h b/src/inc/til/at.h index 9b7c804e839..bb1596a4d69 100644 --- a/src/inc/til/at.h +++ b/src/inc/til/at.h @@ -16,6 +16,7 @@ namespace til template constexpr auto at(T&& cont, const I i) noexcept -> decltype(auto) { +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). #pragma warning(suppress : 26482) // Suppress bounds.2 check for indexing with constant expressions #pragma warning(suppress : 26446) // Suppress bounds.4 check for subscript operator. #pragma warning(suppress : 26445) // Suppress lifetime check for a reference to std::span or std::string_view diff --git a/src/interactivity/win32/find.cpp b/src/interactivity/win32/find.cpp index 896c5885a1b..a0d949130b7 100644 --- a/src/interactivity/win32/find.cpp +++ b/src/interactivity/win32/find.cpp @@ -22,17 +22,18 @@ INT_PTR CALLBACK FindDialogProc(HWND hWnd, UINT Message, WPARAM wParam, LPARAM l auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); // This bool is used to track which option - up or down - was used to perform the last search. That way, the next time the // find dialog is opened, it will default to the last used option. - static auto fFindSearchUp = true; + static auto reverse = true; + static auto caseInsensitive = true; static std::wstring lastFindString; + static Search searcher; - WCHAR szBuf[SEARCH_STRING_LENGTH + 1]; switch (Message) { case WM_INITDIALOG: SetWindowLongPtrW(hWnd, DWLP_USER, lParam); - SendDlgItemMessageW(hWnd, ID_CONSOLE_FINDSTR, EM_LIMITTEXT, ARRAYSIZE(szBuf) - 1, 0); - CheckRadioButton(hWnd, ID_CONSOLE_FINDUP, ID_CONSOLE_FINDDOWN, (fFindSearchUp ? ID_CONSOLE_FINDUP : ID_CONSOLE_FINDDOWN)); - SetDlgItemText(hWnd, ID_CONSOLE_FINDSTR, lastFindString.c_str()); + CheckRadioButton(hWnd, ID_CONSOLE_FINDUP, ID_CONSOLE_FINDDOWN, (reverse ? ID_CONSOLE_FINDUP : ID_CONSOLE_FINDDOWN)); + CheckDlgButton(hWnd, ID_CONSOLE_FINDCASE, !caseInsensitive); + SetDlgItemTextW(hWnd, ID_CONSOLE_FINDSTR, lastFindString.c_str()); return TRUE; case WM_COMMAND: { @@ -40,44 +41,49 @@ INT_PTR CALLBACK FindDialogProc(HWND hWnd, UINT Message, WPARAM wParam, LPARAM l { case IDOK: { - const auto StringLength = (USHORT)GetDlgItemTextW(hWnd, ID_CONSOLE_FINDSTR, szBuf, ARRAYSIZE(szBuf)); - if (StringLength == 0) - { - lastFindString.clear(); - break; - } - const auto IgnoreCase = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDCASE) == 0; - const auto Reverse = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDDOWN) == 0; - fFindSearchUp = !!Reverse; - auto& ScreenInfo = gci.GetActiveOutputBuffer(); - - std::wstring wstr(szBuf, StringLength); - lastFindString = wstr; LockConsole(); auto Unlock = wil::scope_exit([&] { UnlockConsole(); }); - Search search(gci.renderData, - wstr, - Reverse ? Search::Direction::Backward : Search::Direction::Forward, - IgnoreCase ? Search::Sensitivity::CaseInsensitive : Search::Sensitivity::CaseSensitive); - - if (search.FindNext()) + if (searcher.IsStale()) { - Telemetry::Instance().LogFindDialogNextClicked(StringLength, (Reverse != 0), (IgnoreCase == 0)); - search.Select(); - return TRUE; + auto length = SendDlgItemMessageW(hWnd, ID_CONSOLE_FINDSTR, WM_GETTEXTLENGTH, 0, 0); + lastFindString.resize(length); + length = GetDlgItemTextW(hWnd, ID_CONSOLE_FINDSTR, lastFindString.data(), gsl::narrow_cast(length + 1)); + lastFindString.resize(length); + + caseInsensitive = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDCASE) == 0; + reverse = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDDOWN) == 0; + + searcher = Search{ gci.renderData, lastFindString, reverse, caseInsensitive }; } - else + + if (searcher.SelectNext()) { - // The string wasn't found. - ScreenInfo.SendNotifyBeep(); + return TRUE; } + + std::ignore = gci.GetActiveOutputBuffer().SendNotifyBeep(); break; } case IDCANCEL: - Telemetry::Instance().FindDialogClosed(); EndDialog(hWnd, 0); + searcher = Search{}; return TRUE; + case ID_CONSOLE_FINDSTR: + case ID_CONSOLE_FINDCASE: + case ID_CONSOLE_FINDUP: + case ID_CONSOLE_FINDDOWN: + { + const auto hi = HIWORD(wParam); + // ID_CONSOLE_FINDSTR emits EN_CHANGE and the other 3 emit 0 when changing. + if (hi == 0 || hi == EN_CHANGE) + { + searcher = Search{}; + } + break; + } + default: + break; } break; } diff --git a/src/terminal/adapter/adaptDispatch.cpp b/src/terminal/adapter/adaptDispatch.cpp index 92af043e4f6..d49a5b816b7 100644 --- a/src/terminal/adapter/adaptDispatch.cpp +++ b/src/terminal/adapter/adaptDispatch.cpp @@ -894,7 +894,7 @@ void AdaptDispatch::_SelectiveEraseRect(TextBuffer& textBuffer, const til::rect& { for (auto row = eraseRect.top; row < eraseRect.bottom; row++) { - auto& rowBuffer = textBuffer.GetRowByOffset(row); + auto& rowBuffer = textBuffer.GetMutableRowByOffset(row); for (auto col = eraseRect.left; col < eraseRect.right; col++) { // Only unprotected cells are affected. @@ -996,7 +996,7 @@ void AdaptDispatch::_ChangeRectAttributes(TextBuffer& textBuffer, const til::rec { for (auto row = changeRect.top; row < changeRect.bottom; row++) { - auto& rowBuffer = textBuffer.GetRowByOffset(row); + auto& rowBuffer = textBuffer.GetMutableRowByOffset(row); for (auto col = changeRect.left; col < changeRect.right; col++) { auto attr = rowBuffer.GetAttrByColumn(col); @@ -2407,7 +2407,7 @@ void AdaptDispatch::_DoLineFeed(TextBuffer& textBuffer, const bool withReturn, c // If the line was forced to wrap, set the wrap status. // When explicitly moving down a row, clear the wrap status. - textBuffer.GetRowByOffset(currentPosition.y).SetWrapForced(wrapForced); + textBuffer.GetMutableRowByOffset(currentPosition.y).SetWrapForced(wrapForced); // If a carriage return was requested, we move to the leftmost column or // the left margin, depending on whether we started within the margins. @@ -2453,7 +2453,7 @@ void AdaptDispatch::_DoLineFeed(TextBuffer& textBuffer, const bool withReturn, c else { const auto eraseAttributes = _GetEraseAttributes(textBuffer); - textBuffer.GetRowByOffset(newPosition.y).Reset(eraseAttributes); + textBuffer.GetMutableRowByOffset(newPosition.y).Reset(eraseAttributes); } } else diff --git a/src/types/UiaTextRangeBase.cpp b/src/types/UiaTextRangeBase.cpp index da6352358e8..56fa391aeff 100644 --- a/src/types/UiaTextRangeBase.cpp +++ b/src/types/UiaTextRangeBase.cpp @@ -450,7 +450,7 @@ try // Technically, we'll truncate early if there's an embedded null in the BSTR. // But we're probably fine in this circumstance. - const std::wstring queryFontName{ val.bstrVal }; + const std::wstring_view queryFontName{ val.bstrVal, SysStringLen(val.bstrVal) }; if (queryFontName == _pData->GetFontInfo().GetFaceName()) { Clone(ppRetVal); @@ -608,45 +608,55 @@ try }); RETURN_HR_IF(E_FAIL, !_pData->IsUiaDataInitialized()); - const std::wstring queryText{ text, SysStringLen(text) }; - const auto bufferSize = _getOptimizedBufferSize(); - const auto sensitivity = ignoreCase ? Search::Sensitivity::CaseInsensitive : Search::Sensitivity::CaseSensitive; - - auto searchDirection = Search::Direction::Forward; - auto searchAnchor = _start; - if (searchBackward) + const std::wstring_view queryText{ text, SysStringLen(text) }; + const auto results = _pData->GetTextBuffer().SearchText(queryText, ignoreCase != FALSE, _start.y, _end.y + 1); + if (results.empty()) { - searchDirection = Search::Direction::Backward; - - // we need to convert the end to inclusive - // because Search operates with an inclusive til::point - searchAnchor = _end; - bufferSize.DecrementInBounds(searchAnchor, true); + return S_OK; } - Search searcher{ *_pData, queryText, searchDirection, sensitivity, searchAnchor }; + const auto highestIndex = results.size() - 1; + const til::point_span* hit = nullptr; - if (searcher.FindNext()) + if (searchBackward) { - const auto foundLocation = searcher.GetFoundLocation(); - const auto start = foundLocation.first; - - // we need to increment the position of end because it's exclusive - auto end = foundLocation.second; - bufferSize.IncrementInBounds(end, true); + for (size_t i = highestIndex;; --i) + { + hit = &til::at(results, i); + if (hit->start < _end) + { + break; + } - // make sure what was found is within the bounds of the current range - if ((searchDirection == Search::Direction::Forward && end < _end) || - (searchDirection == Search::Direction::Backward && start > _start)) + if (i <= 0) + { + return S_OK; + } + } + } + else + { + for (size_t i = 0;; ++i) { - RETURN_IF_FAILED(Clone(ppRetVal)); - auto& range = static_cast(**ppRetVal); - range._start = start; - range._end = end; + hit = &til::at(results, i); + if (hit->start >= _start) + { + break; + } - UiaTracing::TextRange::FindText(*this, queryText, searchBackward, ignoreCase, range); + if (i >= highestIndex) + { + return S_OK; + } } } + + RETURN_IF_FAILED(Clone(ppRetVal)); + auto& range = static_cast(**ppRetVal); + range._start = hit->start; + range._end = hit->end; + + UiaTracing::TextRange::FindText(*this, queryText, searchBackward, ignoreCase, range); return S_OK; } CATCH_RETURN(); diff --git a/src/types/UiaTracing.cpp b/src/types/UiaTracing.cpp index 9a04103448e..dae05a073b9 100644 --- a/src/types/UiaTracing.cpp +++ b/src/types/UiaTracing.cpp @@ -63,14 +63,14 @@ UiaTracing::~UiaTracing() noexcept TraceLoggingUnregister(g_UiaProviderTraceProvider); } -inline std::wstring UiaTracing::_getValue(const ScreenInfoUiaProviderBase& siup) noexcept +std::wstring UiaTracing::_getValue(const ScreenInfoUiaProviderBase& siup) noexcept { std::wstringstream stream; stream << "_id: " << siup.GetId(); return stream.str(); } -inline std::wstring UiaTracing::_getValue(const UiaTextRangeBase& utr) noexcept +std::wstring UiaTracing::_getValue(const UiaTextRangeBase& utr) noexcept try { const auto start = utr.GetEndpoint(TextPatternRangeEndpoint_Start); @@ -90,7 +90,7 @@ catch (...) return {}; } -inline std::wstring UiaTracing::_getValue(const TextPatternRangeEndpoint endpoint) noexcept +std::wstring UiaTracing::_getValue(const TextPatternRangeEndpoint endpoint) noexcept { switch (endpoint) { @@ -103,7 +103,7 @@ inline std::wstring UiaTracing::_getValue(const TextPatternRangeEndpoint endpoin } } -inline std::wstring UiaTracing::_getValue(const TextUnit unit) noexcept +std::wstring UiaTracing::_getValue(const TextUnit unit) noexcept { switch (unit) { @@ -126,7 +126,7 @@ inline std::wstring UiaTracing::_getValue(const TextUnit unit) noexcept } } -inline std::wstring UiaTracing::_getValue(const VARIANT val) noexcept +std::wstring UiaTracing::_getValue(const VARIANT val) noexcept { // This is not a comprehensive conversion of VARIANT result to string // We're only including the one's we need at this time. @@ -148,7 +148,7 @@ inline std::wstring UiaTracing::_getValue(const VARIANT val) noexcept } } -inline std::wstring UiaTracing::_getValue(const AttributeType attrType) noexcept +std::wstring UiaTracing::_getValue(const AttributeType attrType) noexcept { switch (attrType) { @@ -263,7 +263,7 @@ void UiaTracing::TextRange::FindAttribute(const UiaTextRangeBase& utr, TEXTATTRI } } -void UiaTracing::TextRange::FindText(const UiaTextRangeBase& base, std::wstring text, bool searchBackward, bool ignoreCase, const UiaTextRangeBase& result) noexcept +void UiaTracing::TextRange::FindText(const UiaTextRangeBase& base, const std::wstring_view& text, bool searchBackward, bool ignoreCase, const UiaTextRangeBase& result) noexcept { EnsureRegistration(); if (TraceLoggingProviderEnabled(g_UiaProviderTraceProvider, WINEVENT_LEVEL_VERBOSE, TIL_KEYWORD_TRACE)) @@ -272,7 +272,7 @@ void UiaTracing::TextRange::FindText(const UiaTextRangeBase& base, std::wstring g_UiaProviderTraceProvider, "UiaTextRange::FindText", TraceLoggingValue(_getValue(base).c_str(), "base"), - TraceLoggingValue(text.c_str(), "text"), + TraceLoggingCountedWideString(text.data(), (ULONG)text.size(), "text"), TraceLoggingValue(searchBackward, "searchBackward"), TraceLoggingValue(ignoreCase, "ignoreCase"), TraceLoggingValue(_getValue(result).c_str(), "result"), @@ -326,7 +326,7 @@ void UiaTracing::TextRange::GetEnclosingElement(const UiaTextRangeBase& utr) noe } } -void UiaTracing::TextRange::GetText(const UiaTextRangeBase& utr, int maxLength, std::wstring result) noexcept +void UiaTracing::TextRange::GetText(const UiaTextRangeBase& utr, int maxLength, const std::wstring_view& result) noexcept { EnsureRegistration(); if (TraceLoggingProviderEnabled(g_UiaProviderTraceProvider, WINEVENT_LEVEL_VERBOSE, TIL_KEYWORD_TRACE)) @@ -336,7 +336,7 @@ void UiaTracing::TextRange::GetText(const UiaTextRangeBase& utr, int maxLength, "UiaTextRange::GetText", TraceLoggingValue(_getValue(utr).c_str(), "base"), TraceLoggingValue(maxLength, "maxLength"), - TraceLoggingValue(result.c_str(), "result"), + TraceLoggingCountedWideString(result.data(), (ULONG)result.size(), "result"), TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), TraceLoggingKeyword(TIL_KEYWORD_TRACE)); } diff --git a/src/types/UiaTracing.h b/src/types/UiaTracing.h index f9752caf480..58c95231a10 100644 --- a/src/types/UiaTracing.h +++ b/src/types/UiaTracing.h @@ -46,11 +46,11 @@ namespace Microsoft::Console::Types static void CompareEndpoints(const UiaTextRangeBase& base, const TextPatternRangeEndpoint endpoint, const UiaTextRangeBase& other, TextPatternRangeEndpoint otherEndpoint, int result) noexcept; static void ExpandToEnclosingUnit(TextUnit unit, const UiaTextRangeBase& result) noexcept; static void FindAttribute(const UiaTextRangeBase& base, TEXTATTRIBUTEID attributeId, VARIANT val, BOOL searchBackwards, const UiaTextRangeBase& result, AttributeType attrType = AttributeType::Standard) noexcept; - static void FindText(const UiaTextRangeBase& base, std::wstring text, bool searchBackward, bool ignoreCase, const UiaTextRangeBase& result) noexcept; + static void FindText(const UiaTextRangeBase& base, const std::wstring_view& text, bool searchBackward, bool ignoreCase, const UiaTextRangeBase& result) noexcept; static void GetAttributeValue(const UiaTextRangeBase& base, TEXTATTRIBUTEID id, VARIANT result, AttributeType attrType = AttributeType::Standard) noexcept; static void GetBoundingRectangles(const UiaTextRangeBase& base) noexcept; static void GetEnclosingElement(const UiaTextRangeBase& base) noexcept; - static void GetText(const UiaTextRangeBase& base, int maxLength, std::wstring result) noexcept; + static void GetText(const UiaTextRangeBase& base, int maxLength, const std::wstring_view& result) noexcept; static void Move(TextUnit unit, int count, int resultCount, const UiaTextRangeBase& result) noexcept; static void MoveEndpointByUnit(TextPatternRangeEndpoint endpoint, TextUnit unit, int count, int resultCount, const UiaTextRangeBase& result) noexcept; static void MoveEndpointByRange(TextPatternRangeEndpoint endpoint, const UiaTextRangeBase& other, TextPatternRangeEndpoint otherEndpoint, const UiaTextRangeBase& result) noexcept; @@ -104,13 +104,13 @@ namespace Microsoft::Console::Types UiaTracing& operator=(const UiaTracing&) = delete; UiaTracing& operator=(UiaTracing&&) = delete; - static inline std::wstring _getValue(const ScreenInfoUiaProviderBase& siup) noexcept; - static inline std::wstring _getValue(const UiaTextRangeBase& utr) noexcept; - static inline std::wstring _getValue(const TextPatternRangeEndpoint endpoint) noexcept; - static inline std::wstring _getValue(const TextUnit unit) noexcept; + static std::wstring _getValue(const ScreenInfoUiaProviderBase& siup) noexcept; + static std::wstring _getValue(const UiaTextRangeBase& utr) noexcept; + static std::wstring _getValue(const TextPatternRangeEndpoint endpoint) noexcept; + static std::wstring _getValue(const TextUnit unit) noexcept; - static inline std::wstring _getValue(const AttributeType attrType) noexcept; - static inline std::wstring _getValue(const VARIANT val) noexcept; + static std::wstring _getValue(const AttributeType attrType) noexcept; + static std::wstring _getValue(const VARIANT val) noexcept; // these are used to assign IDs to new UiaTextRanges and ScreenInfoUiaProviders respectively static IdType _utrId; From a346a2a60a836240fee62ec6002e3ceb9ebf967e Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 14:56:12 +0200 Subject: [PATCH 02/22] Improve documentation --- src/buffer/out/Row.cpp | 8 +++-- src/buffer/out/Row.hpp | 4 +-- src/buffer/out/UTextAdapter.cpp | 61 ++++++++++++++++++++++++++++++--- 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index c933de6950e..467b7de8d27 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -90,6 +90,8 @@ CharToColumnMapper::CharToColumnMapper(const wchar_t* chars, const uint16_t* cha { } +// If given a position (`offset`) inside the ROW's text, this function will return the corresponding column. +// This function in particular returns the glyph's first column. til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t offset) noexcept { offset = offset < 0 ? 0 : (offset > _lastCharOffset ? _lastCharOffset : offset); @@ -137,6 +139,8 @@ til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t offset) noexcept return col; } +// If given a position (`offset`) inside the ROW's text, this function will return the corresponding column. +// This function in particular returns the glyph's last column (this matters for wide glyphs). til::CoordType CharToColumnMapper::GetTrailingColumnAt(ptrdiff_t offset) noexcept { auto col = GetLeadingColumnAt(offset); @@ -1027,13 +1031,13 @@ std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType column return { _chars.data() + chBeg, chEnd - chBeg }; } -til::CoordType ROW::GetLeftAlignedColumnAtChar(const ptrdiff_t offset) const noexcept +til::CoordType ROW::GetLeadingColumnAtCharOffset(const ptrdiff_t offset) const noexcept { auto mapper = _createCharToColumnMapper(offset); return mapper.GetLeadingColumnAt(offset); } -til::CoordType ROW::GetRightAlignedColumnAtChar(const ptrdiff_t offset) const noexcept +til::CoordType ROW::GetTrailingColumnAtCharOffset(const ptrdiff_t offset) const noexcept { auto mapper = _createCharToColumnMapper(offset); return mapper.GetTrailingColumnAt(offset); diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp index 379b1856bcc..586aa12b95b 100644 --- a/src/buffer/out/Row.hpp +++ b/src/buffer/out/Row.hpp @@ -157,8 +157,8 @@ class ROW final DbcsAttribute DbcsAttrAt(til::CoordType column) const noexcept; std::wstring_view GetText() const noexcept; std::wstring_view GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept; - til::CoordType GetLeftAlignedColumnAtChar(ptrdiff_t offset) const noexcept; - til::CoordType GetRightAlignedColumnAtChar(ptrdiff_t offset) const noexcept; + til::CoordType GetLeadingColumnAtCharOffset(ptrdiff_t offset) const noexcept; + til::CoordType GetTrailingColumnAtCharOffset(ptrdiff_t offset) const noexcept; DelimiterClass DelimiterClassAt(til::CoordType column, const std::wstring_view& wordDelimiters) const noexcept; auto AttrBegin() const noexcept { return _attr.begin(); } diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index d23d883b336..f51f496b44b 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -27,6 +27,24 @@ constexpr til::CoordType& accessCurrentRow(UText* ut) noexcept return ut->b; } +// An excerpt from the ICU documentation: +// +// Clone a UText. Much like opening a UText where the source text is itself another UText. +// +// A shallow clone replicates only the UText data structures; it does not make +// a copy of the underlying text. Shallow clones can be used as an efficient way to +// have multiple iterators active in a single text string that is not being modified. +// +// A shallow clone operation must not fail except for truly exceptional conditions such +// as memory allocation failures. +// +// @param dest A UText struct to be filled in with the result of the clone operation, +// or NULL if the clone function should heap-allocate a new UText struct. +// @param src The UText to be cloned. +// @param deep true to request a deep clone, false for a shallow clone. +// @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR should +// be returned if the text provider is unable to clone the original text. +// @return The newly created clone, or NULL if the clone operation failed. static UText* U_CALLCONV utextClone(UText* dest, const UText* src, UBool deep, UErrorCode* status) noexcept { __assume(status != nullptr); @@ -46,7 +64,13 @@ static UText* U_CALLCONV utextClone(UText* dest, const UText* src, UBool deep, U return dest; } -static int64_t U_CALLCONV utextLength(UText* ut) noexcept +// An excerpt from the ICU documentation: +// +// Gets the length of the text. +// +// @param ut the UText to get the length of. +// @return the length, in the native units of the original text string. +static int64_t U_CALLCONV utextNativeLength(UText* ut) noexcept try { auto length = accessLength(ut); @@ -71,6 +95,19 @@ catch (...) return 0; } +// An excerpt from the ICU documentation: +// +// Get the description of the text chunk containing the text at a requested native index. +// The UText's iteration position will be left at the requested index. +// If the index is out of bounds, the iteration position will be left +// at the start or end of the string, as appropriate. +// +// @param ut the UText being accessed. +// @param nativeIndex Requested index of the text to be accessed. +// @param forward If true, then the returned chunk must contain text starting from the index, so that start<=indexchunkNativeStart); + ret.start.x = textBuffer.GetRowByOffset(y).GetLeadingColumnAtCharOffset(nativeIndexBeg - ut->chunkNativeStart); ret.start.y = y; } else @@ -250,7 +303,7 @@ til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t if (utextAccess(ut, nativeIndexEnd, true)) { const auto y = accessCurrentRow(ut); - ret.end.x = textBuffer.GetRowByOffset(y).GetLeftAlignedColumnAtChar(nativeIndexEnd - ut->chunkNativeStart); + ret.end.x = textBuffer.GetRowByOffset(y).GetTrailingColumnAtCharOffset(nativeIndexEnd - ut->chunkNativeStart); ret.end.y = y; } else From dba1629d06e132184817ed7757afe891a2f8fec8 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 14:59:18 +0200 Subject: [PATCH 03/22] Spelling fix --- .github/actions/spelling/expect/expect.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 3a69241e2fd..acdb6a50b6e 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -1440,6 +1440,7 @@ prc prealigned prect prefast +preflighting prefs prepopulated presorted @@ -1953,6 +1954,7 @@ UAX UBool ucd uch +UChars udk UDM uer From 8146ecdffe8e75ef137b98f4480d0c8b8588bafc Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 15:15:16 +0200 Subject: [PATCH 04/22] Fix a bug and x86 woes --- src/buffer/out/UTextAdapter.cpp | 6 +++--- src/types/UiaTextRangeBase.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index f51f496b44b..41f9e799ae8 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -231,7 +231,7 @@ try const auto& textBuffer = *static_cast(ut->context); const auto y = accessCurrentRow(ut); const auto offset = ut->chunkNativeStart - nativeStart; - const auto text = textBuffer.GetRowByOffset(y).GetText().substr(offset); + const auto text = textBuffer.GetRowByOffset(y).GetText().substr(gsl::narrow_cast(std::max(0, offset))); const auto length = std::min(gsl::narrow_cast(destCapacity), text.size()); memcpy(dest, text.data(), length * sizeof(char16_t)); @@ -292,7 +292,7 @@ til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t if (utextAccess(ut, nativeIndexBeg, true)) { const auto y = accessCurrentRow(ut); - ret.start.x = textBuffer.GetRowByOffset(y).GetLeadingColumnAtCharOffset(nativeIndexBeg - ut->chunkNativeStart); + ret.start.x = textBuffer.GetRowByOffset(y).GetLeadingColumnAtCharOffset(gsl::narrow_cast(nativeIndexBeg - ut->chunkNativeStart)); ret.start.y = y; } else @@ -303,7 +303,7 @@ til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t if (utextAccess(ut, nativeIndexEnd, true)) { const auto y = accessCurrentRow(ut); - ret.end.x = textBuffer.GetRowByOffset(y).GetTrailingColumnAtCharOffset(nativeIndexEnd - ut->chunkNativeStart); + ret.end.x = textBuffer.GetRowByOffset(y).GetTrailingColumnAtCharOffset(gsl::narrow_cast(nativeIndexEnd - ut->chunkNativeStart)); ret.end.y = y; } else diff --git a/src/types/UiaTextRangeBase.cpp b/src/types/UiaTextRangeBase.cpp index 56fa391aeff..f1175f34c2d 100644 --- a/src/types/UiaTextRangeBase.cpp +++ b/src/types/UiaTextRangeBase.cpp @@ -623,7 +623,7 @@ try for (size_t i = highestIndex;; --i) { hit = &til::at(results, i); - if (hit->start < _end) + if (hit->end < _end) { break; } From 5ca26aa3dd0f92a407bec8c8166080d578a13d3f Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 15:32:19 +0200 Subject: [PATCH 05/22] More x86 woes, Improve UiaTextRangeBase --- src/buffer/out/UTextAdapter.cpp | 5 +++-- src/types/UiaTextRangeBase.cpp | 23 ++++++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index 41f9e799ae8..a929c1e7594 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -232,11 +232,12 @@ try const auto y = accessCurrentRow(ut); const auto offset = ut->chunkNativeStart - nativeStart; const auto text = textBuffer.GetRowByOffset(y).GetText().substr(gsl::narrow_cast(std::max(0, offset))); - const auto length = std::min(gsl::narrow_cast(destCapacity), text.size()); + const auto destCapacitySizeT = gsl::narrow_cast(destCapacity); + const auto length = std::min(destCapacitySizeT, text.size()); memcpy(dest, text.data(), length * sizeof(char16_t)); - if (length < destCapacity) + if (length < destCapacitySizeT) { #pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). dest[length] = 0; diff --git a/src/types/UiaTextRangeBase.cpp b/src/types/UiaTextRangeBase.cpp index f1175f34c2d..e3f9a0dde62 100644 --- a/src/types/UiaTextRangeBase.cpp +++ b/src/types/UiaTextRangeBase.cpp @@ -620,6 +620,7 @@ try if (searchBackward) { + // Find the last result that is in the [_start,_end) range. for (size_t i = highestIndex;; --i) { hit = &til::at(results, i); @@ -628,6 +629,7 @@ try break; } + // ...but exit when none are found. if (i <= 0) { return S_OK; @@ -636,6 +638,7 @@ try } else { + // Find the first result that is in the [_start,_end) range. for (size_t i = 0;; ++i) { hit = &til::at(results, i); @@ -644,6 +647,7 @@ try break; } + // ...but exit when none are found. if (i >= highestIndex) { return S_OK; @@ -651,12 +655,21 @@ try } } - RETURN_IF_FAILED(Clone(ppRetVal)); - auto& range = static_cast(**ppRetVal); - range._start = hit->start; - range._end = hit->end; + const auto start = hit->start; + auto end = hit->end; + + // we need to increment the position of end because it's exclusive + _getOptimizedBufferSize().IncrementInBounds(end, true); + + if (start >= _start && end <= _end) + { + RETURN_IF_FAILED(Clone(ppRetVal)); + auto& range = static_cast(**ppRetVal); + range._start = start; + range._end = end; + UiaTracing::TextRange::FindText(*this, queryText, searchBackward, ignoreCase, range); + } - UiaTracing::TextRange::FindText(*this, queryText, searchBackward, ignoreCase, range); return S_OK; } CATCH_RETURN(); From e0fb764c05fa5d52f397845d6a247c48a02612a0 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 16:21:23 +0200 Subject: [PATCH 06/22] Fix unit tests --- src/buffer/out/search.h | 4 - src/buffer/out/ut_textbuffer/ReflowTests.cpp | 2 +- src/host/selection.cpp | 18 +++-- src/host/ut_host/ScreenBufferTests.cpp | 2 +- src/host/ut_host/SearchTests.cpp | 78 ++++++++----------- src/host/ut_host/TextBufferTests.cpp | 36 ++++----- src/inc/test/CommonState.hpp | 2 +- .../UiaTextRangeTests.cpp | 4 +- 8 files changed, 68 insertions(+), 78 deletions(-) diff --git a/src/buffer/out/search.h b/src/buffer/out/search.h index b8361188b16..67a75ba3e45 100644 --- a/src/buffer/out/search.h +++ b/src/buffer/out/search.h @@ -39,8 +39,4 @@ class Search final ptrdiff_t _index = 0; ptrdiff_t _step = 0; uint64_t _mutationCount = 0; - -#ifdef UNIT_TESTING - friend class SearchTests; -#endif }; diff --git a/src/buffer/out/ut_textbuffer/ReflowTests.cpp b/src/buffer/out/ut_textbuffer/ReflowTests.cpp index 2d4a5c4a1e7..3c99a9fb583 100644 --- a/src/buffer/out/ut_textbuffer/ReflowTests.cpp +++ b/src/buffer/out/ut_textbuffer/ReflowTests.cpp @@ -739,7 +739,7 @@ class ReflowTests til::CoordType y = 0; for (const auto& testRow : testBuffer.rows) { - auto& row{ buffer->GetRowByOffset(y) }; + auto& row{ buffer->GetMutableRowByOffset(y) }; row.SetWrapForced(testRow.wrap); diff --git a/src/host/selection.cpp b/src/host/selection.cpp index 88b00fd45de..97145a6013a 100644 --- a/src/host/selection.cpp +++ b/src/host/selection.cpp @@ -104,7 +104,10 @@ void Selection::_SetSelectionVisibility(const bool fMakeVisible) _PaintSelection(); } - LOG_IF_FAILED(ServiceLocator::LocateConsoleWindow()->SignalUia(UIA_Text_TextSelectionChangedEventId)); + if (const auto window = ServiceLocator::LocateConsoleWindow()) + { + LOG_IF_FAILED(window->SignalUia(UIA_Text_TextSelectionChangedEventId)); + } } // Routine Description: @@ -269,12 +272,14 @@ void Selection::ExtendSelection(_In_ til::point coordBufferPos) _PaintSelection(); // Fire off an event to let accessibility apps know the selection has changed. - auto pNotifier = ServiceLocator::LocateAccessibilityNotifier(); - if (pNotifier) + if (const auto pNotifier = ServiceLocator::LocateAccessibilityNotifier()) { pNotifier->NotifyConsoleCaretEvent(IAccessibilityNotifier::ConsoleCaretEventFlags::CaretSelection, PACKCOORD(coordBufferPos)); } - LOG_IF_FAILED(ServiceLocator::LocateConsoleWindow()->SignalUia(UIA_Text_TextSelectionChangedEventId)); + if (const auto window = ServiceLocator::LocateConsoleWindow()) + { + LOG_IF_FAILED(window->SignalUia(UIA_Text_TextSelectionChangedEventId)); + } } // Routine Description: @@ -366,7 +371,10 @@ void Selection::ClearSelection(const bool fStartingNewSelection) { _CancelMarkSelection(); } - LOG_IF_FAILED(ServiceLocator::LocateConsoleWindow()->SignalUia(UIA_Text_TextSelectionChangedEventId)); + if (const auto window = ServiceLocator::LocateConsoleWindow()) + { + LOG_IF_FAILED(window->SignalUia(UIA_Text_TextSelectionChangedEventId)); + } _dwSelectionFlags = 0; diff --git a/src/host/ut_host/ScreenBufferTests.cpp b/src/host/ut_host/ScreenBufferTests.cpp index b26801be0a1..1dd058f057d 100644 --- a/src/host/ut_host/ScreenBufferTests.cpp +++ b/src/host/ut_host/ScreenBufferTests.cpp @@ -3650,7 +3650,7 @@ void _FillLine(til::point position, T fillContent, TextAttribute fillAttr) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); auto& si = gci.GetActiveOutputBuffer().GetActiveBuffer(); - auto& row = si.GetTextBuffer().GetRowByOffset(position.y); + auto& row = si.GetTextBuffer().GetMutableRowByOffset(position.y); row.WriteCells({ fillContent, fillAttr }, position.x, false); } diff --git a/src/host/ut_host/SearchTests.cpp b/src/host/ut_host/SearchTests.cpp index faa3935e518..d786cd0ddd2 100644 --- a/src/host/ut_host/SearchTests.cpp +++ b/src/host/ut_host/SearchTests.cpp @@ -53,109 +53,95 @@ class SearchTests TEST_METHOD_CLEANUP(MethodCleanup) { m_state->CleanupNewTextBufferInfo(); - + Selection::Instance().ClearSelection(); return true; } - void DoFoundChecks(Search& s, til::point& coordStartExpected, til::CoordType lineDelta) + static void DoFoundChecks(Search& s, til::point coordStartExpected, til::CoordType lineDelta) { + const auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); + auto coordEndExpected = coordStartExpected; coordEndExpected.x += 1; - VERIFY_IS_TRUE(s.FindNext()); - VERIFY_ARE_EQUAL(coordStartExpected, s._coordSelStart); - VERIFY_ARE_EQUAL(coordEndExpected, s._coordSelEnd); + VERIFY_IS_TRUE(s.SelectNext()); + VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); + VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - VERIFY_IS_TRUE(s.FindNext()); - VERIFY_ARE_EQUAL(coordStartExpected, s._coordSelStart); - VERIFY_ARE_EQUAL(coordEndExpected, s._coordSelEnd); + VERIFY_IS_TRUE(s.SelectNext()); + VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); + VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - VERIFY_IS_TRUE(s.FindNext()); - VERIFY_ARE_EQUAL(coordStartExpected, s._coordSelStart); - VERIFY_ARE_EQUAL(coordEndExpected, s._coordSelEnd); + VERIFY_IS_TRUE(s.SelectNext()); + VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); + VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - VERIFY_IS_TRUE(s.FindNext()); - VERIFY_ARE_EQUAL(coordStartExpected, s._coordSelStart); - VERIFY_ARE_EQUAL(coordEndExpected, s._coordSelEnd); - - VERIFY_IS_FALSE(s.FindNext()); + VERIFY_IS_TRUE(s.SelectNext()); + VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); + VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); } TEST_METHOD(ForwardCaseSensitive) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - til::point coordStartExpected; - Search s(gci.renderData, L"AB", Search::Direction::Forward, Search::Sensitivity::CaseSensitive); - DoFoundChecks(s, coordStartExpected, 1); + Search s(gci.renderData, L"AB", false, false); + DoFoundChecks(s, {}, 1); } TEST_METHOD(ForwardCaseSensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - - til::point coordStartExpected = { 2, 0 }; - Search s(gci.renderData, L"\x304b", Search::Direction::Forward, Search::Sensitivity::CaseSensitive); - DoFoundChecks(s, coordStartExpected, 1); + Search s(gci.renderData, L"\x304b", false, false); + DoFoundChecks(s, { 2, 0 }, 1); } TEST_METHOD(ForwardCaseInsensitive) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - til::point coordStartExpected; - Search s(gci.renderData, L"ab", Search::Direction::Forward, Search::Sensitivity::CaseInsensitive); - DoFoundChecks(s, coordStartExpected, 1); + Search s(gci.renderData, L"ab", false, true); + DoFoundChecks(s, {}, 1); } TEST_METHOD(ForwardCaseInsensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - - til::point coordStartExpected = { 2, 0 }; - Search s(gci.renderData, L"\x304b", Search::Direction::Forward, Search::Sensitivity::CaseInsensitive); - DoFoundChecks(s, coordStartExpected, 1); + Search s(gci.renderData, L"\x304b", false, true); + DoFoundChecks(s, { 2, 0 }, 1); } TEST_METHOD(BackwardCaseSensitive) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - - til::point coordStartExpected = { 0, 3 }; - Search s(gci.renderData, L"AB", Search::Direction::Backward, Search::Sensitivity::CaseSensitive); - DoFoundChecks(s, coordStartExpected, -1); + Search s(gci.renderData, L"AB", true, false); + DoFoundChecks(s, { 0, 3 }, -1); } TEST_METHOD(BackwardCaseSensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - - til::point coordStartExpected = { 2, 3 }; - Search s(gci.renderData, L"\x304b", Search::Direction::Backward, Search::Sensitivity::CaseSensitive); - DoFoundChecks(s, coordStartExpected, -1); + Search s(gci.renderData, L"\x304b", true, false); + DoFoundChecks(s, { 2, 3 }, -1); } TEST_METHOD(BackwardCaseInsensitive) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - - til::point coordStartExpected = { 0, 3 }; - Search s(gci.renderData, L"ab", Search::Direction::Backward, Search::Sensitivity::CaseInsensitive); - DoFoundChecks(s, coordStartExpected, -1); + Search s(gci.renderData, L"ab", true, true); + DoFoundChecks(s, { 0, 3 }, -1); } TEST_METHOD(BackwardCaseInsensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - - til::point coordStartExpected = { 2, 3 }; - Search s(gci.renderData, L"\x304b", Search::Direction::Backward, Search::Sensitivity::CaseInsensitive); - DoFoundChecks(s, coordStartExpected, -1); + Search s(gci.renderData, L"\x304b", true, true); + DoFoundChecks(s, { 2, 3 }, -1); } }; diff --git a/src/host/ut_host/TextBufferTests.cpp b/src/host/ut_host/TextBufferTests.cpp index bcb4cec4a99..44781d38a72 100644 --- a/src/host/ut_host/TextBufferTests.cpp +++ b/src/host/ut_host/TextBufferTests.cpp @@ -188,7 +188,7 @@ void TextBufferTests::TestWrapFlag() { auto& textBuffer = GetTbi(); - auto& Row = textBuffer.GetRowByOffset(0); + auto& Row = textBuffer.GetMutableRowByOffset(0); // no wrap by default VERIFY_IS_FALSE(Row.WasWrapForced()); @@ -278,7 +278,7 @@ void TextBufferTests::TestDoubleBytePadFlag() { auto& textBuffer = GetTbi(); - auto& Row = textBuffer.GetRowByOffset(0); + auto& Row = textBuffer.GetMutableRowByOffset(0); // no padding by default VERIFY_IS_FALSE(Row.WasDoubleBytePadded()); @@ -300,7 +300,7 @@ void TextBufferTests::DoBoundaryTest(PCWCHAR const pwszInputString, { auto& textBuffer = GetTbi(); - auto& row = textBuffer.GetRowByOffset(0); + auto& row = textBuffer.GetMutableRowByOffset(0); // copy string into buffer for (til::CoordType i = 0; i < cLength; ++i) @@ -571,7 +571,7 @@ void TextBufferTests::TestSetWrapOnCurrentRow() auto sCurrentRow = textBuffer.GetCursor().GetPosition().y; - auto& Row = textBuffer.GetRowByOffset(sCurrentRow); + auto& Row = textBuffer.GetMutableRowByOffset(sCurrentRow); Log::Comment(L"Testing off to on"); @@ -622,7 +622,7 @@ void TextBufferTests::TestIncrementCircularBuffer() textBuffer._firstRow = iRowToTestIndex; // fill first row with some stuff - auto& FirstRow = textBuffer.GetRowByOffset(0); + auto& FirstRow = textBuffer.GetMutableRowByOffset(0); FirstRow.ReplaceCharacters(0, 1, { L"A" }); // ensure it does say that it contains text @@ -1847,7 +1847,7 @@ void TextBufferTests::ResizeTraditionalRotationPreservesHighUnicode() // This is the negative squared latin capital letter B emoji: 🅱 // It's encoded in UTF-16, as needed by the buffer. const auto bButton = L"\xD83C\xDD71"; - _buffer->GetRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, bButton); + _buffer->GetMutableRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, bButton); // Read back the text at that position and ensure that it matches what we wrote. const auto readBack = _buffer->GetTextDataAt(pos); @@ -1888,7 +1888,7 @@ void TextBufferTests::ScrollBufferRotationPreservesHighUnicode() // This is the fire emoji: 🔥 // It's encoded in UTF-16, as needed by the buffer. const auto fire = L"\xD83D\xDD25"; - _buffer->GetRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, fire); + _buffer->GetMutableRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, fire); // Read back the text at that position and ensure that it matches what we wrote. const auto readBack = _buffer->GetTextDataAt(pos); @@ -1923,7 +1923,7 @@ void TextBufferTests::ResizeTraditionalHighUnicodeRowRemoval() // This is the eggplant emoji: 🍆 // It's encoded in UTF-16, as needed by the buffer. const auto emoji = L"\xD83C\xDF46"; - _buffer->GetRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, emoji); + _buffer->GetMutableRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, emoji); // Read back the text at that position and ensure that it matches what we wrote. const auto readBack = _buffer->GetTextDataAt(pos); @@ -1953,7 +1953,7 @@ void TextBufferTests::ResizeTraditionalHighUnicodeColumnRemoval() // This is the peach emoji: 🍑 // It's encoded in UTF-16, as needed by the buffer. const auto emoji = L"\xD83C\xDF51"; - _buffer->GetRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, emoji); + _buffer->GetMutableRowByOffset(pos.y).ReplaceCharacters(pos.x, 2, emoji); // Read back the text at that position and ensure that it matches what we wrote. const auto readBack = _buffer->GetTextDataAt(pos); @@ -1993,7 +1993,7 @@ void TextBufferTests::TestOverwriteChars() UINT cursorSize = 12; TextAttribute attr{ 0x7f }; TextBuffer buffer{ bufferSize, attr, cursorSize, false, _renderer }; - auto& row = buffer.GetRowByOffset(0); + auto& row = buffer.GetMutableRowByOffset(0); // scientist emoji U+1F9D1 U+200D U+1F52C #define complex1 L"\U0001F9D1\U0000200D\U0001F52C" @@ -2009,17 +2009,17 @@ void TextBufferTests::TestOverwriteChars() // Test overwriting wide chars with wide chars slightly shifted left/right. row.ReplaceCharacters(1, 2, complex1); row.ReplaceCharacters(7, 2, complex1); - VERIFY_ARE_EQUAL(L" " complex1 L" " complex1 L" ", row.GetText()); + VERIFY_ARE_EQUAL(L" " complex1 L" " complex1, row.GetText()); // Test overwriting wide chars with wide chars. row.ReplaceCharacters(1, 2, complex2); row.ReplaceCharacters(7, 2, complex2); - VERIFY_ARE_EQUAL(L" " complex2 L" " complex2 L" ", row.GetText()); + VERIFY_ARE_EQUAL(L" " complex2 L" " complex2, row.GetText()); // Test overwriting wide chars with narrow chars. row.ReplaceCharacters(1, 1, simple); row.ReplaceCharacters(8, 1, simple); - VERIFY_ARE_EQUAL(L" " simple L" " simple L" ", row.GetText()); + VERIFY_ARE_EQUAL(L" " simple L" " simple, row.GetText()); // Test clearing narrow/wide chars. row.ReplaceCharacters(0, 1, simple); @@ -2049,7 +2049,7 @@ void TextBufferTests::TestRowReplaceText() static constexpr UINT cursorSize = 12; const TextAttribute attr{ 0x7f }; TextBuffer buffer{ bufferSize, attr, cursorSize, false, _renderer }; - auto& row = buffer.GetRowByOffset(0); + auto& row = buffer.GetMutableRowByOffset(0); #define complex L"\U0001F41B" @@ -2755,14 +2755,14 @@ void TextBufferTests::HyperlinkTrim() const auto id = _buffer->GetHyperlinkId(url, customId); TextAttribute newAttr{ 0x7f }; newAttr.SetHyperlinkId(id); - _buffer->GetRowByOffset(pos.y).SetAttrToEnd(pos.x, newAttr); + _buffer->GetMutableRowByOffset(pos.y).SetAttrToEnd(pos.x, newAttr); _buffer->AddHyperlinkToMap(url, id); // Set a different hyperlink id somewhere else in the buffer const til::point otherPos{ 70, 5 }; const auto otherId = _buffer->GetHyperlinkId(otherUrl, otherCustomId); newAttr.SetHyperlinkId(otherId); - _buffer->GetRowByOffset(otherPos.y).SetAttrToEnd(otherPos.x, newAttr); + _buffer->GetMutableRowByOffset(otherPos.y).SetAttrToEnd(otherPos.x, newAttr); _buffer->AddHyperlinkToMap(otherUrl, otherId); // Increment the circular buffer @@ -2799,12 +2799,12 @@ void TextBufferTests::NoHyperlinkTrim() const auto id = _buffer->GetHyperlinkId(url, customId); TextAttribute newAttr{ 0x7f }; newAttr.SetHyperlinkId(id); - _buffer->GetRowByOffset(pos.y).SetAttrToEnd(pos.x, newAttr); + _buffer->GetMutableRowByOffset(pos.y).SetAttrToEnd(pos.x, newAttr); _buffer->AddHyperlinkToMap(url, id); // Set the same hyperlink id somewhere else in the buffer const til::point otherPos{ 70, 5 }; - _buffer->GetRowByOffset(otherPos.y).SetAttrToEnd(otherPos.x, newAttr); + _buffer->GetMutableRowByOffset(otherPos.y).SetAttrToEnd(otherPos.x, newAttr); // Increment the circular buffer _buffer->IncrementCircularBuffer(); diff --git a/src/inc/test/CommonState.hpp b/src/inc/test/CommonState.hpp index 9a0e1272b7d..1803acb0ae8 100644 --- a/src/inc/test/CommonState.hpp +++ b/src/inc/test/CommonState.hpp @@ -237,7 +237,7 @@ class CommonState for (til::CoordType iRow = 0; iRow < cRowsToFill; iRow++) { - ROW& row = textBuffer.GetRowByOffset(iRow); + ROW& row = textBuffer.GetMutableRowByOffset(iRow); FillRow(&row, iRow & 1); } diff --git a/src/interactivity/win32/ut_interactivity_win32/UiaTextRangeTests.cpp b/src/interactivity/win32/ut_interactivity_win32/UiaTextRangeTests.cpp index 8278bfa952a..34b4e20caef 100644 --- a/src/interactivity/win32/ut_interactivity_win32/UiaTextRangeTests.cpp +++ b/src/interactivity/win32/ut_interactivity_win32/UiaTextRangeTests.cpp @@ -361,7 +361,7 @@ class UiaTextRangeTests for (auto i = 0; i < _pTextBuffer->TotalRowCount() / 2; ++i) { const std::wstring_view glyph{ i % 2 == 0 ? L" " : L"X" }; - auto& row = _pTextBuffer->GetRowByOffset(i); + auto& row = _pTextBuffer->GetMutableRowByOffset(i); const auto width = row.size(); for (uint16_t x = 0; x < width; ++x) @@ -489,7 +489,7 @@ class UiaTextRangeTests // Let's start by filling the text buffer with something useful: for (auto i = 0; i < _pTextBuffer->TotalRowCount(); ++i) { - auto& row = _pTextBuffer->GetRowByOffset(i); + auto& row = _pTextBuffer->GetMutableRowByOffset(i); const auto width = row.size(); for (uint16_t x = 0; x < width; ++x) From ecc735d0077be911e6f3f49ad2f27ad7889c864f Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 19:21:50 +0200 Subject: [PATCH 07/22] Implement backwards text iteration --- src/buffer/out/UTextAdapter.cpp | 104 ++++++++++++++++---------------- src/buffer/out/textBuffer.cpp | 4 +- 2 files changed, 56 insertions(+), 52 deletions(-) diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index a929c1e7594..6ade9b4b5c3 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -111,12 +111,15 @@ catch (...) static UBool U_CALLCONV utextAccess(UText* ut, int64_t nativeIndex, UBool forward) noexcept try { + if (nativeIndex < 0) + { + nativeIndex = 0; + } + + auto neededIndex = nativeIndex; if (!forward) { - // Even after reading the ICU documentation I'm a little unclear on how to handle the forward flag. - // I _think_ it's basically nothing but "nativeIndex--" for us, but I didn't want to verify it - // because right now we never use any ICU functions that require backwards text access anyways. - return false; + neededIndex--; } const auto& textBuffer = *static_cast(ut->context); @@ -126,60 +129,58 @@ try auto y = accessCurrentRow(ut); std::wstring_view text; - if (nativeIndex >= start && nativeIndex < limit) - { - return true; - } - - if (nativeIndex < start) + if (neededIndex < start || neededIndex >= limit) { - for (;;) + if (neededIndex < start) { - --y; - if (y < range.begin) + do { - return false; - } - - text = textBuffer.GetRowByOffset(y).GetText(); - limit = start; - start -= text.size(); - - if (nativeIndex >= start) - { - break; - } + --y; + if (y < range.begin) + { + return false; + } + + text = textBuffer.GetRowByOffset(y).GetText(); + limit = start; + start -= text.size(); + } while (neededIndex < start); } - } - else - { - for (;;) + else { - ++y; - if (y >= range.end) + do { - return false; - } + ++y; + if (y >= range.end) + { + return false; + } + + text = textBuffer.GetRowByOffset(y).GetText(); + start = limit; + limit += text.size(); + } while (neededIndex >= limit); + } + + accessCurrentRow(ut) = y; + ut->chunkNativeStart = start; + ut->chunkNativeLimit = limit; + ut->chunkLength = gsl::narrow_cast(text.size()); +#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). + ut->chunkContents = reinterpret_cast(text.data()); + ut->nativeIndexingLimit = ut->chunkLength; + } - text = textBuffer.GetRowByOffset(y).GetText(); - start = limit; - limit += text.size(); + auto offset = gsl::narrow_cast(nativeIndex - start); - if (nativeIndex < limit) - { - break; - } - } + // Don't leave the offset on a trailing surrogate pair. See U16_SET_CP_START. + // This assumes that the TextBuffer contains valid UTF-16 which may theoretically not be the case. + if (offset > 0 && offset < ut->chunkLength && U16_IS_TRAIL(til::at(ut->chunkContents, offset))) + { + offset--; } - accessCurrentRow(ut) = y; - ut->chunkNativeStart = start; - ut->chunkNativeLimit = limit; - ut->chunkOffset = gsl::narrow_cast(nativeIndex - start); - ut->chunkLength = gsl::narrow_cast(text.size()); -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - ut->chunkContents = reinterpret_cast(text.data()); - ut->nativeIndexingLimit = ut->chunkLength; + ut->chunkOffset = offset; return true; } catch (...) @@ -281,7 +282,8 @@ UText* UTextFromTextBuffer(UText* ut, const TextBuffer& textBuffer, til::CoordTy return ut; } -// Returns an inclusive point range given a text start and end position. This function is designed to be used with uregex_start64/uregex_end64. +// Returns an inclusive point range given a text start and end position. +// This function is designed to be used with uregex_start64/uregex_end64. til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t nativeIndexEnd) { // The parameters are given as a half-open [beg,end) range, but the point_span we return in closed [beg,end]. @@ -293,7 +295,7 @@ til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t if (utextAccess(ut, nativeIndexBeg, true)) { const auto y = accessCurrentRow(ut); - ret.start.x = textBuffer.GetRowByOffset(y).GetLeadingColumnAtCharOffset(gsl::narrow_cast(nativeIndexBeg - ut->chunkNativeStart)); + ret.start.x = textBuffer.GetRowByOffset(y).GetLeadingColumnAtCharOffset(ut->chunkOffset); ret.start.y = y; } else @@ -304,7 +306,7 @@ til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t if (utextAccess(ut, nativeIndexEnd, true)) { const auto y = accessCurrentRow(ut); - ret.end.x = textBuffer.GetRowByOffset(y).GetTrailingColumnAtCharOffset(gsl::narrow_cast(nativeIndexEnd - ut->chunkNativeStart)); + ret.end.x = textBuffer.GetRowByOffset(y).GetTrailingColumnAtCharOffset(ut->chunkOffset); ret.end.y = y; } else diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 5219a1a2f51..27d0c7b2b3a 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -2792,7 +2792,9 @@ PointTree TextBuffer::GetPatterns(const til::CoordType firstRow, const til::Coor { const auto nativeIndexBeg = uregex_start64(_urlRegex, 0, &status); const auto nativeIndexEnd = uregex_end64(_urlRegex, 0, &status); - const auto range = BufferRangeFromUText(&text, nativeIndexBeg, nativeIndexEnd); + auto range = BufferRangeFromUText(&text, nativeIndexBeg, nativeIndexEnd); + // PointTree uses half-open ranges. + range.end.x++; intervals.push_back(PointTree::interval(range.start, range.end, 0)); } while (uregex_findNext(_urlRegex, &status)); } From 56c79112e5b001641e4934d994e4effb26e9d919 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 21 Aug 2023 19:26:27 +0200 Subject: [PATCH 08/22] Address feedback --- src/buffer/out/Row.cpp | 2 +- src/cascadia/TerminalControl/ControlCore.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 467b7de8d27..f25ef795e6b 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -10,7 +10,7 @@ #include "textBuffer.hpp" #include "../../types/inc/GlyphWidth.hpp" -// It would be nice to add checked array access in the future, but it's a little annoying to do so without imparting +// It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting // performance (including Debug performance). Other languages are a little bit more ergonomic there than C++. #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).) #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). diff --git a/src/cascadia/TerminalControl/ControlCore.cpp b/src/cascadia/TerminalControl/ControlCore.cpp index c1d67d95d9e..3e0a9fe7e9c 100644 --- a/src/cascadia/TerminalControl/ControlCore.cpp +++ b/src/cascadia/TerminalControl/ControlCore.cpp @@ -1542,7 +1542,6 @@ namespace winrt::Microsoft::Terminal::Control::implementation void ControlCore::Search(const winrt::hstring& text, const bool goForward, const bool caseSensitive) { auto lock = _terminal->LockForWriting(); - _terminal->SetBlockSelection(false); if (_searcher.IsStale() || _searcherText != text || _searcherGoForward != goForward || _searcherCaseSensitive != caseSensitive) { @@ -1558,6 +1557,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation // this is used for search, // DO NOT call _updateSelectionUI() here. // We don't want to show the markers so manually tell it to clear it. + _terminal->SetBlockSelection(false); _renderer->TriggerSelection(); _UpdateSelectionMarkersHandlers(*this, winrt::make(true)); } From c731eae08c97633dd3dd7aa7c4a99d85dbe351f0 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 22 Aug 2023 18:55:44 +0200 Subject: [PATCH 09/22] Address feedback --- src/buffer/out/Row.cpp | 2 +- src/buffer/out/UTextAdapter.cpp | 38 ++++--- src/buffer/out/UTextAdapter.h | 8 +- src/buffer/out/sources.inc | 3 +- src/buffer/out/textBuffer.cpp | 63 +---------- src/buffer/out/textBuffer.hpp | 1 - src/cascadia/TerminalCore/Terminal.cpp | 101 +++++++++++++++++- src/cascadia/TerminalCore/Terminal.hpp | 1 + .../TerminalCore/TerminalSelection.cpp | 2 +- 9 files changed, 134 insertions(+), 85 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index f25ef795e6b..649825267bf 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -94,7 +94,7 @@ CharToColumnMapper::CharToColumnMapper(const wchar_t* chars, const uint16_t* cha // This function in particular returns the glyph's first column. til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t offset) noexcept { - offset = offset < 0 ? 0 : (offset > _lastCharOffset ? _lastCharOffset : offset); + offset = clamp(offset, 0, _lastCharOffset); auto col = _currentColumn; const auto currentOffset = _charOffsets[col]; diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index 6ade9b4b5c3..dc5549d7327 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -262,30 +262,40 @@ static constexpr UTextFuncs utextFuncs{ }; // Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd). -UText* UTextFromTextBuffer(UText* ut, const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept +UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept { __assume(status != nullptr); - ut = utext_setup(ut, 0, status); - if (*status > U_ZERO_ERROR) - { - return nullptr; - } + UText ut = UTEXT_INITIALIZER; + ut.providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS); + ut.pFuncs = &utextFuncs; + ut.context = &textBuffer; + accessCurrentRow(&ut) = rowBeg - 1; // the utextAccess() below will advance this by 1. + accessRowRange(&ut) = { rowBeg, rowEnd }; - ut->providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS); - ut->pFuncs = &utextFuncs; - ut->context = &textBuffer; - accessCurrentRow(ut) = rowBeg - 1; // the utextAccess() below will advance this by 1. - accessRowRange(ut) = { rowBeg, rowEnd }; - - utextAccess(ut, 0, true); + utextAccess(&ut, 0, true); return ut; } +URegularExpression* CreateURegularExpression(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept +{ +#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). + const auto re = uregex_open(reinterpret_cast(pattern.data()), gsl::narrow_cast(pattern.size()), flags, nullptr, status); + // ICU describes the time unit as being dependent on CPU performance and "typically [in] the order of milliseconds", + // but this claim seems highly outdated already. On my CPU from 2021, a limit of 4096 equals roughly 600ms. + uregex_setTimeLimit(re, 4096, status); + uregex_setStackLimit(re, 4 * 1024 * 1024, status); + return re; +} + // Returns an inclusive point range given a text start and end position. // This function is designed to be used with uregex_start64/uregex_end64. -til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t nativeIndexEnd) +til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re) { + UErrorCode status = U_ZERO_ERROR; + const auto nativeIndexBeg = uregex_start64(re, 0, &status); + auto nativeIndexEnd = uregex_end64(re, 0, &status); + // The parameters are given as a half-open [beg,end) range, but the point_span we return in closed [beg,end]. nativeIndexEnd--; diff --git a/src/buffer/out/UTextAdapter.h b/src/buffer/out/UTextAdapter.h index 06d413902a4..ff523e26032 100644 --- a/src/buffer/out/UTextAdapter.h +++ b/src/buffer/out/UTextAdapter.h @@ -7,7 +7,9 @@ #include class TextBuffer; -struct UText; -UText* UTextFromTextBuffer(UText* ut, const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept; -til::point_span BufferRangeFromUText(UText* ut, int64_t nativeIndexBeg, int64_t nativeIndexEnd); +using unique_URegularExpression = wistd::unique_ptr>; + +UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept; +URegularExpression* CreateURegularExpression(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept; +til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re); diff --git a/src/buffer/out/sources.inc b/src/buffer/out/sources.inc index af3d278749c..6611cc0aa5e 100644 --- a/src/buffer/out/sources.inc +++ b/src/buffer/out/sources.inc @@ -40,7 +40,8 @@ SOURCES= \ ..\textBuffer.cpp \ ..\textBufferCellIterator.cpp \ ..\textBufferTextIterator.cpp \ - ..\search.cpp \ + ..\search.cpp \ + ..\UTextAdapter.cpp \ INCLUDES= \ $(INCLUDES); \ diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 27d0c7b2b3a..d7ce7b60575 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -18,7 +18,6 @@ using namespace Microsoft::Console; using namespace Microsoft::Console::Types; using PointTree = interval_tree::IntervalTree; -using unique_URegularExpression = wistd::unique_ptr>; constexpr bool allWhitespace(const std::wstring_view& text) noexcept { @@ -32,17 +31,6 @@ constexpr bool allWhitespace(const std::wstring_view& text) noexcept return true; } -static URegularExpression* createURegularExpression(const std::wstring_view& pattern, uint32_t flags, UErrorCode* error) noexcept -{ -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - const auto re = uregex_open(reinterpret_cast(pattern.data()), gsl::narrow_cast(pattern.size()), flags, nullptr, error); - // ICU describes the time unit as being dependent on CPU performance and "typically [in] the order of milliseconds", - // but this claim seems highly outdated already. On my CPU from 2021, a limit of 4096 equals roughly 600ms. - uregex_setTimeLimit(re, 4096, error); - uregex_setStackLimit(re, 4 * 1024 * 1024, error); - return re; -} - static std::atomic s_mutationCountInitialValue; // Routine Description: @@ -2761,47 +2749,6 @@ void TextBuffer::CopyHyperlinkMaps(const TextBuffer& other) _currentHyperlinkId = other._currentHyperlinkId; } -// Method Description: -// - Finds patterns within the requested region of the text buffer -// Arguments: -// - The firstRow to start searching from -// - The lastRow to search -// Return value: -// - An interval tree containing the patterns found -PointTree TextBuffer::GetPatterns(const til::CoordType firstRow, const til::CoordType lastRow) -{ - PointTree::interval_vector intervals; - - UErrorCode status = U_ZERO_ERROR; -#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47). - UText text = UTEXT_INITIALIZER; - UTextFromTextBuffer(&text, *this, firstRow, lastRow + 1, &status); - - if (!_urlRegex) - { - static constexpr std::wstring_view linkPattern{ LR"(\b(?:https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|$!:,.;]*[A-Za-z0-9+&@#/%=~_|$])" }; - _urlRegex = createURegularExpression(linkPattern, 0, &status); - assert(_urlRegex); - } - - uregex_setUText(_urlRegex, &text, &status); - - if (uregex_find(_urlRegex, -1, &status)) - { - do - { - const auto nativeIndexBeg = uregex_start64(_urlRegex, 0, &status); - const auto nativeIndexEnd = uregex_end64(_urlRegex, 0, &status); - auto range = BufferRangeFromUText(&text, nativeIndexBeg, nativeIndexEnd); - // PointTree uses half-open ranges. - range.end.x++; - intervals.push_back(PointTree::interval(range.start, range.end, 0)); - } while (uregex_findNext(_urlRegex, &status)); - } - - return PointTree{ std::move(intervals) }; -} - // Searches through the entire (committed) text buffer for `needle` and returns the coordinates in absolute coordinates. // The end coordinates of the returned ranges are considered inclusive. std::vector TextBuffer::SearchText(const std::wstring_view& needle, bool caseInsensitive) const @@ -2827,20 +2774,16 @@ std::vector TextBuffer::SearchText(const std::wstring_view& nee WI_SetFlagIf(flags, UREGEX_CASE_INSENSITIVE, caseInsensitive); UErrorCode status = U_ZERO_ERROR; -#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47). - UText text = UTEXT_INITIALIZER; - UTextFromTextBuffer(&text, *this, rowBeg, rowEnd, &status); + auto text = UTextFromTextBuffer(*this, rowBeg, rowEnd, &status); - const unique_URegularExpression re{ createURegularExpression(needle, flags, &status) }; + const unique_URegularExpression re{ CreateURegularExpression(needle, flags, &status) }; uregex_setUText(re.get(), &text, &status); if (uregex_find(re.get(), -1, &status)) { do { - const auto nativeIndexBeg = uregex_start64(re.get(), 0, &status); - const auto nativeIndexEnd = uregex_end64(re.get(), 0, &status); - results.emplace_back(BufferRangeFromUText(&text, nativeIndexBeg, nativeIndexEnd)); + results.emplace_back(BufferRangeFromMatch(&text, re.get())); } while (uregex_findNext(re.get(), &status)); } diff --git a/src/buffer/out/textBuffer.hpp b/src/buffer/out/textBuffer.hpp index 23e7b77d254..90b64591701 100644 --- a/src/buffer/out/textBuffer.hpp +++ b/src/buffer/out/textBuffer.hpp @@ -265,7 +265,6 @@ class TextBuffer final const std::optional lastCharacterViewport, std::optional> positionInfo); - interval_tree::IntervalTree GetPatterns(const til::CoordType firstRow, const til::CoordType lastRow); std::vector SearchText(const std::wstring_view& needle, bool caseInsensitive) const; std::vector SearchText(const std::wstring_view& needle, bool caseInsensitive, til::CoordType rowBeg, til::CoordType rowEnd) const; diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index ada1bed5b62..c0f339f90dc 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -9,7 +9,9 @@ #include "../../types/inc/utils.hpp" #include "../../types/inc/colorTable.hpp" #include "../../buffer/out/search.h" +#include "../../buffer/out/UTextAdapter.h" +#include #include using namespace winrt::Microsoft::Terminal::Core; @@ -568,7 +570,7 @@ std::wstring Terminal::GetHyperlinkAtBufferPosition(const til::point bufferPos) { // Hyperlink is outside of the current view. // We need to find if there's a pattern at that location. - const auto patterns = _activeBuffer().GetPatterns(bufferPos.y, bufferPos.y); + const auto patterns = _getPatterns(bufferPos.y, bufferPos.y); // NOTE: patterns is stored with top y-position being 0, // so we need to cleverly set the y-pos to 0. @@ -1207,9 +1209,8 @@ bool Terminal::IsCursorBlinkingAllowed() const noexcept // - INVARIANT: this function can only be called if the caller has the writing lock on the terminal void Terminal::UpdatePatternsUnderLock() { - auto oldTree = _patternIntervalTree; - _patternIntervalTree = _activeBuffer().GetPatterns(_VisibleStartIndex(), _VisibleEndIndex()); - _InvalidatePatternTree(oldTree); + _InvalidatePatternTree(_patternIntervalTree); + _patternIntervalTree = _getPatterns(_VisibleStartIndex(), _VisibleEndIndex()); _InvalidatePatternTree(_patternIntervalTree); } @@ -1344,6 +1345,98 @@ void Terminal::_updateUrlDetection() } } +// Interns URegularExpression instances so that they can be reused. This method is thread-safe. +static unique_URegularExpression internURegularExpression(const std::wstring_view& pattern) +{ + struct CacheValue + { + unique_URegularExpression re; + size_t generation = 0; + }; + + struct CacheKeyHasher + { + using is_transparent = void; + + std::size_t operator()(const std::wstring_view& str) const + { + return til::hash(str); + } + }; + + struct SharedState + { + wil::srwlock lock; + std::unordered_map> set; + size_t totalInsertions = 0; + }; + + static SharedState shared; + static constexpr size_t cacheSizeLimit = 128; + + UErrorCode status = U_ZERO_ERROR; + + { + const auto guard = shared.lock.lock_shared(); + if (const auto it = shared.set.find(pattern); it != shared.set.end()) + { + return unique_URegularExpression{ uregex_clone(it->second.re.get(), &status) }; + } + } + + // Even if the URegularExpression creation failed, we'll insert it into the cache, because there's no point in retrying. + // (Apart from OOM but in that case this application will crash anyways in 3.. 2.. 1..) + unique_URegularExpression re{ CreateURegularExpression(pattern, 0, &status) }; + unique_URegularExpression clone{ uregex_clone(re.get(), &status) }; + std::wstring key{ pattern }; + + const auto guard = shared.lock.lock_exclusive(); + + shared.set.insert_or_assign(std::move(key), CacheValue{ std::move(re), shared.totalInsertions }); + shared.totalInsertions++; + + // If the cache is full remove the oldest element (oldest = lowest generation, just like with humans). + if (shared.set.size() > cacheSizeLimit) + { + shared.set.erase(std::min_element(shared.set.begin(), shared.set.end(), [](const auto& it, const auto& smallest) { + return it.second.generation < smallest.second.generation; + })); + } + + return clone; +} + +PointTree Terminal::_getPatterns(til::CoordType beg, til::CoordType end) const +{ + static constexpr std::array patterns{ + LR"(\b(?:https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|$!:,.;]*[A-Za-z0-9+&@#/%=~_|$])", + }; + + PointTree::interval_vector intervals; + + UErrorCode status = U_ZERO_ERROR; + auto text = UTextFromTextBuffer(_activeBuffer(), beg, end + 1, &status); + + for (size_t i = 0; i < patterns.size(); ++i) + { + const auto re = internURegularExpression(patterns[i]); + uregex_setUText(re.get(), &text, &status); + + if (uregex_find(re.get(), -1, &status)) + { + do + { + auto range = BufferRangeFromMatch(&text, re.get()); + // PointTree uses half-open ranges. + range.end.x++; + intervals.push_back(PointTree::interval(range.start, range.end, 0)); + } while (uregex_findNext(re.get(), &status)); + } + } + + return PointTree{ std::move(intervals) }; +} + // NOTE: This is the version of AddMark that comes from the UI. The VT api call into this too. void Terminal::AddMark(const ScrollMark& mark, const til::point& start, diff --git a/src/cascadia/TerminalCore/Terminal.hpp b/src/cascadia/TerminalCore/Terminal.hpp index a0110cfbfb9..8a76201822c 100644 --- a/src/cascadia/TerminalCore/Terminal.hpp +++ b/src/cascadia/TerminalCore/Terminal.hpp @@ -448,6 +448,7 @@ class Microsoft::Terminal::Core::Terminal final : bool _inAltBuffer() const noexcept; TextBuffer& _activeBuffer() const noexcept; void _updateUrlDetection(); + interval_tree::IntervalTree _getPatterns(til::CoordType beg, til::CoordType end) const; #pragma region TextSelection // These methods are defined in TerminalSelection.cpp diff --git a/src/cascadia/TerminalCore/TerminalSelection.cpp b/src/cascadia/TerminalCore/TerminalSelection.cpp index 018bf0623c6..110e2ebbaea 100644 --- a/src/cascadia/TerminalCore/TerminalSelection.cpp +++ b/src/cascadia/TerminalCore/TerminalSelection.cpp @@ -495,7 +495,7 @@ void Terminal::SelectHyperlink(const SearchDirection dir) const til::point bufferEnd{ bufferSize.RightInclusive(), ViewEndIndex() }; while (!result && bufferSize.IsInBounds(searchStart) && bufferSize.IsInBounds(searchEnd) && searchStart <= searchEnd && bufferStart <= searchStart && searchEnd <= bufferEnd) { - auto patterns = _activeBuffer().GetPatterns(searchStart.y, searchEnd.y); + auto patterns = _getPatterns(searchStart.y, searchEnd.y); resultList = patterns.findContained(convertToSearchArea(searchStart), convertToSearchArea(searchEnd)); result = extractResultFromList(resultList); if (!result) From ca4ec38485ce50a43b41fb2ff3ff2b1c4a72f188 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 22 Aug 2023 22:49:56 +0200 Subject: [PATCH 10/22] Address feedback, Use namespaces --- src/buffer/out/UTextAdapter.cpp | 12 +++++++----- src/buffer/out/UTextAdapter.h | 12 +++++++----- src/buffer/out/textBuffer.cpp | 10 +++------- src/buffer/out/textBuffer.hpp | 4 ---- src/cascadia/TerminalCore/Terminal.cpp | 14 +++++++------- 5 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index dc5549d7327..986a0de446c 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -202,6 +202,9 @@ catch (...) // @param destCapacity The size, in UChars, of the destination buffer. May be zero for precomputing the required size. // @param status receives any error status. If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for preflighting. // @return Number of UChars in the data. Does not include a trailing NUL. +// +// NOTE: utextExtract's correctness hasn't been verified yet. The code remains, just incase its functionality is needed in the future. +#pragma warning(suppress : 4505) // 'utextExtract': unreferenced function with internal linkage has been removed static int32_t U_CALLCONV utextExtract(UText* ut, int64_t nativeStart, int64_t nativeLimit, char16_t* dest, int32_t destCapacity, UErrorCode* status) noexcept try { @@ -258,11 +261,10 @@ static constexpr UTextFuncs utextFuncs{ .clone = utextClone, .nativeLength = utextNativeLength, .access = utextAccess, - .extract = utextExtract, }; // Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd). -UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept +UText Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept { __assume(status != nullptr); @@ -277,7 +279,7 @@ UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, t return ut; } -URegularExpression* CreateURegularExpression(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept +Microsoft::Console::ICU::unique_uregex Microsoft::Console::ICU::CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept { #pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). const auto re = uregex_open(reinterpret_cast(pattern.data()), gsl::narrow_cast(pattern.size()), flags, nullptr, status); @@ -285,12 +287,12 @@ URegularExpression* CreateURegularExpression(const std::wstring_view& pattern, u // but this claim seems highly outdated already. On my CPU from 2021, a limit of 4096 equals roughly 600ms. uregex_setTimeLimit(re, 4096, status); uregex_setStackLimit(re, 4 * 1024 * 1024, status); - return re; + return unique_uregex{ re }; } // Returns an inclusive point range given a text start and end position. // This function is designed to be used with uregex_start64/uregex_end64. -til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re) +til::point_span Microsoft::Console::ICU::BufferRangeFromMatch(UText* ut, URegularExpression* re) { UErrorCode status = U_ZERO_ERROR; const auto nativeIndexBeg = uregex_start64(re, 0, &status); diff --git a/src/buffer/out/UTextAdapter.h b/src/buffer/out/UTextAdapter.h index ff523e26032..158fd25ca2d 100644 --- a/src/buffer/out/UTextAdapter.h +++ b/src/buffer/out/UTextAdapter.h @@ -3,13 +3,15 @@ #pragma once -// Can't forward declare the UErrorCode enum. Thanks C++. #include class TextBuffer; -using unique_URegularExpression = wistd::unique_ptr>; +namespace Microsoft::Console::ICU +{ + using unique_uregex = wistd::unique_ptr>; -UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept; -URegularExpression* CreateURegularExpression(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept; -til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re); + UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept; + unique_uregex CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept; + til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re); +} diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index d7ce7b60575..24762954632 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -69,10 +69,6 @@ TextBuffer::~TextBuffer() { _destroy(); } - if (_urlRegex) - { - uregex_close(_urlRegex); - } } // I put these functions in a block at the start of the class, because they're the most @@ -2774,16 +2770,16 @@ std::vector TextBuffer::SearchText(const std::wstring_view& nee WI_SetFlagIf(flags, UREGEX_CASE_INSENSITIVE, caseInsensitive); UErrorCode status = U_ZERO_ERROR; - auto text = UTextFromTextBuffer(*this, rowBeg, rowEnd, &status); + auto text = ICU::UTextFromTextBuffer(*this, rowBeg, rowEnd, &status); - const unique_URegularExpression re{ CreateURegularExpression(needle, flags, &status) }; + const auto re = ICU::CreateRegex(needle, flags, &status); uregex_setUText(re.get(), &text, &status); if (uregex_find(re.get(), -1, &status)) { do { - results.emplace_back(BufferRangeFromMatch(&text, re.get())); + results.emplace_back(ICU::BufferRangeFromMatch(&text, re.get())); } while (uregex_findNext(re.get(), &status)); } diff --git a/src/buffer/out/textBuffer.hpp b/src/buffer/out/textBuffer.hpp index 90b64591701..19744b1bed4 100644 --- a/src/buffer/out/textBuffer.hpp +++ b/src/buffer/out/textBuffer.hpp @@ -375,10 +375,6 @@ class TextBuffer final Cursor _cursor; std::vector _marks; - // While safe RAII wrappers for URegularExpression* would improve maintainability, it would make - // it more difficult to avoid having to forward declare (or outright expose) more ICU stuff. - // This may make switching to other Unicode libraries easier in the distant future. - URegularExpression* _urlRegex = nullptr; bool _isActiveBuffer = false; #ifdef UNIT_TESTING diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index c0f339f90dc..9259ff069e0 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -1346,11 +1346,11 @@ void Terminal::_updateUrlDetection() } // Interns URegularExpression instances so that they can be reused. This method is thread-safe. -static unique_URegularExpression internURegularExpression(const std::wstring_view& pattern) +static ICU::unique_uregex internURegularExpression(const std::wstring_view& pattern) { struct CacheValue { - unique_URegularExpression re; + ICU::unique_uregex re; size_t generation = 0; }; @@ -1380,14 +1380,14 @@ static unique_URegularExpression internURegularExpression(const std::wstring_vie const auto guard = shared.lock.lock_shared(); if (const auto it = shared.set.find(pattern); it != shared.set.end()) { - return unique_URegularExpression{ uregex_clone(it->second.re.get(), &status) }; + return ICU::unique_uregex{ uregex_clone(it->second.re.get(), &status) }; } } // Even if the URegularExpression creation failed, we'll insert it into the cache, because there's no point in retrying. // (Apart from OOM but in that case this application will crash anyways in 3.. 2.. 1..) - unique_URegularExpression re{ CreateURegularExpression(pattern, 0, &status) }; - unique_URegularExpression clone{ uregex_clone(re.get(), &status) }; + auto re = ICU::CreateRegex(pattern, 0, &status); + ICU::unique_uregex clone{ uregex_clone(re.get(), &status) }; std::wstring key{ pattern }; const auto guard = shared.lock.lock_exclusive(); @@ -1415,7 +1415,7 @@ PointTree Terminal::_getPatterns(til::CoordType beg, til::CoordType end) const PointTree::interval_vector intervals; UErrorCode status = U_ZERO_ERROR; - auto text = UTextFromTextBuffer(_activeBuffer(), beg, end + 1, &status); + auto text = ICU::UTextFromTextBuffer(_activeBuffer(), beg, end + 1, &status); for (size_t i = 0; i < patterns.size(); ++i) { @@ -1426,7 +1426,7 @@ PointTree Terminal::_getPatterns(til::CoordType beg, til::CoordType end) const { do { - auto range = BufferRangeFromMatch(&text, re.get()); + auto range = ICU::BufferRangeFromMatch(&text, re.get()); // PointTree uses half-open ranges. range.end.x++; intervals.push_back(PointTree::interval(range.start, range.end, 0)); From d53d4bd99cf09550a3ea6c5b81ef19abe2fe2d9c Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 22 Aug 2023 22:50:40 +0200 Subject: [PATCH 11/22] Spelling fix --- .github/actions/spelling/allow/apis.txt | 1 + .github/actions/spelling/expect/expect.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/actions/spelling/allow/apis.txt b/.github/actions/spelling/allow/apis.txt index 1f85a1afab2..71b2922ca03 100644 --- a/.github/actions/spelling/allow/apis.txt +++ b/.github/actions/spelling/allow/apis.txt @@ -187,6 +187,7 @@ snprintf spsc sregex SRWLOC +srwlock SRWLOCK STDCPP STDMETHOD diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index acdb6a50b6e..fd5a01dc8f2 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -1300,6 +1300,7 @@ onecoreuuid ONECOREWINDOWS onehalf oneseq +OOM openbash opencode opencon From 1e527c4090ba8488ef98a020c2813393ba80c20e Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 23 Aug 2023 17:09:24 +0200 Subject: [PATCH 12/22] Remove selection coloring from Search --- src/buffer/out/search.cpp | 13 ---------- src/buffer/out/search.h | 3 --- src/cascadia/TerminalCore/Terminal.cpp | 24 +++++++++++++------ src/cascadia/TerminalCore/Terminal.hpp | 1 - .../TerminalCore/TerminalSelection.cpp | 13 ---------- src/host/renderData.cpp | 13 ---------- src/host/renderData.hpp | 1 - src/host/selectionInput.cpp | 8 +++++-- src/host/ut_host/VtIoTests.cpp | 4 ---- src/renderer/inc/IRenderData.hpp | 1 - 10 files changed, 23 insertions(+), 58 deletions(-) diff --git a/src/buffer/out/search.cpp b/src/buffer/out/search.cpp index 1735872ae10..2c38c80ae23 100644 --- a/src/buffer/out/search.cpp +++ b/src/buffer/out/search.cpp @@ -5,7 +5,6 @@ #include "search.h" #include "textBuffer.hpp" -#include "../types/inc/GlyphWidth.hpp" using namespace Microsoft::Console::Types; @@ -85,15 +84,3 @@ bool Search::SelectNext() _renderData->SelectNewRegion(selStart, selEnd); return true; } - -// Routine Description: -// - Applies the supplied TextAttribute to all search results. -// Arguments: -// - attr - The attribute to apply to the result -void Search::ColorAll(const TextAttribute& attr) const -{ - for (const auto& s : _results) - { - _renderData->ColorSelection(s.start, s.end, attr); - } -} diff --git a/src/buffer/out/search.h b/src/buffer/out/search.h index 67a75ba3e45..6adb2be9bd3 100644 --- a/src/buffer/out/search.h +++ b/src/buffer/out/search.h @@ -17,7 +17,6 @@ Revision History: #pragma once -#include "TextAttribute.hpp" #include "textBuffer.hpp" #include "../renderer/inc/IRenderData.hpp" @@ -30,8 +29,6 @@ class Search final bool IsStale() const noexcept; bool SelectNext(); - void ColorAll(const TextAttribute& attr) const; - private: // _renderData is a pointer so that Search() is constexpr default constructable. Microsoft::Console::Render::IRenderData* _renderData = nullptr; diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index 9259ff069e0..a8602a5681a 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -1552,28 +1552,38 @@ std::wstring_view Terminal::CurrentCommand() const void Terminal::ColorSelection(const TextAttribute& attr, winrt::Microsoft::Terminal::Core::MatchMode matchMode) { + const auto colorSelection = [this](const til::point coordStart, const til::point coordEnd, const TextAttribute& attr) { + auto& textBuffer = _activeBuffer(); + const auto spanLength = textBuffer.SpanLength(coordStart, coordEnd); + textBuffer.Write(OutputCellIterator(attr, spanLength), coordStart); + }; + for (const auto [start, end] : _GetSelectionSpans()) { try { if (matchMode == winrt::Microsoft::Terminal::Core::MatchMode::None) { - ColorSelection(start, end, attr); + colorSelection(start, end, attr); } else if (matchMode == winrt::Microsoft::Terminal::Core::MatchMode::All) { - const auto textBuffer = _activeBuffer().GetPlainText(start, end); - std::wstring_view text{ textBuffer }; + const auto& textBuffer = _activeBuffer(); + const auto text = textBuffer.GetPlainText(start, end); + std::wstring_view textView{ text }; if (IsBlockSelection()) { - text = Utils::TrimPaste(text); + textView = Utils::TrimPaste(textView); } - if (!text.empty()) + if (!textView.empty()) { - const Search search(*this, text, false, true); - search.ColorAll(attr); + const auto hits = textBuffer.SearchText(textView, true); + for (const auto& s : hits) + { + colorSelection(s.start, s.end, attr); + } } } } diff --git a/src/cascadia/TerminalCore/Terminal.hpp b/src/cascadia/TerminalCore/Terminal.hpp index 8a76201822c..ac5a18ec91f 100644 --- a/src/cascadia/TerminalCore/Terminal.hpp +++ b/src/cascadia/TerminalCore/Terminal.hpp @@ -216,7 +216,6 @@ class Microsoft::Terminal::Core::Terminal final : const til::point GetSelectionAnchor() const noexcept override; const til::point GetSelectionEnd() const noexcept override; const std::wstring_view GetConsoleTitle() const noexcept override; - void ColorSelection(const til::point coordSelectionStart, const til::point coordSelectionEnd, const TextAttribute) override; const bool IsUiaDataInitialized() const noexcept override; #pragma endregion diff --git a/src/cascadia/TerminalCore/TerminalSelection.cpp b/src/cascadia/TerminalCore/TerminalSelection.cpp index 110e2ebbaea..5f89c533896 100644 --- a/src/cascadia/TerminalCore/TerminalSelection.cpp +++ b/src/cascadia/TerminalCore/TerminalSelection.cpp @@ -887,16 +887,3 @@ void Terminal::_ScrollToPoint(const til::point pos) _activeBuffer().TriggerScroll(); } } - -// Method Description: -// - apply the TextAttribute "attr" to the active buffer -// Arguments: -// - coordStart - where to begin applying attr -// - coordEnd - where to end applying attr (inclusive) -// - attr - the text attributes to apply -void Terminal::ColorSelection(const til::point coordStart, const til::point coordEnd, const TextAttribute attr) -{ - const auto spanLength = _activeBuffer().SpanLength(coordStart, coordEnd); - - _activeBuffer().Write(OutputCellIterator(attr, spanLength), coordStart); -} diff --git a/src/host/renderData.cpp b/src/host/renderData.cpp index 1f1dadda001..d327c0b9d61 100644 --- a/src/host/renderData.cpp +++ b/src/host/renderData.cpp @@ -418,16 +418,3 @@ const til::point RenderData::GetSelectionEnd() const noexcept return { x_pos, y_pos }; } - -// Routine Description: -// - Given two points in the buffer space, color the selection between the two with the given attribute. -// - This will create an internal selection rectangle covering the two points, assume a line selection, -// and use the first point as the anchor for the selection (as if the mouse click started at that point) -// Arguments: -// - coordSelectionStart - Anchor point (start of selection) for the region to be colored -// - coordSelectionEnd - Other point referencing the rectangle inscribing the selection area -// - attr - Color to apply to region. -void RenderData::ColorSelection(const til::point coordSelectionStart, const til::point coordSelectionEnd, const TextAttribute attr) -{ - Selection::Instance().ColorSelection(coordSelectionStart, coordSelectionEnd, attr); -} diff --git a/src/host/renderData.hpp b/src/host/renderData.hpp index 4d796812a81..5e649353cd7 100644 --- a/src/host/renderData.hpp +++ b/src/host/renderData.hpp @@ -56,6 +56,5 @@ class RenderData final : void SelectNewRegion(const til::point coordStart, const til::point coordEnd) override; const til::point GetSelectionAnchor() const noexcept override; const til::point GetSelectionEnd() const noexcept override; - void ColorSelection(const til::point coordSelectionStart, const til::point coordSelectionEnd, const TextAttribute attr) override; const bool IsUiaDataInitialized() const noexcept override { return true; } }; diff --git a/src/host/selectionInput.cpp b/src/host/selectionInput.cpp index 37ad00c96a6..69d40b9ae79 100644 --- a/src/host/selectionInput.cpp +++ b/src/host/selectionInput.cpp @@ -708,8 +708,12 @@ bool Selection::_HandleColorSelection(const INPUT_KEY_INFO* const pInputKeyInfo) Telemetry::Instance().LogColorSelectionUsed(); - const Search search(gci.renderData, str, false, true); - search.ColorAll(selectionAttr); + const auto& textBuffer = gci.renderData.GetTextBuffer(); + const auto hits = textBuffer.SearchText(str, true); + for (const auto& s : hits) + { + ColorSelection(s.start, s.end, selectionAttr); + } } } CATCH_LOG(); diff --git a/src/host/ut_host/VtIoTests.cpp b/src/host/ut_host/VtIoTests.cpp index 329dd3f46ec..902f711771d 100644 --- a/src/host/ut_host/VtIoTests.cpp +++ b/src/host/ut_host/VtIoTests.cpp @@ -373,10 +373,6 @@ class MockRenderData : public IRenderData return {}; } - void ColorSelection(const til::point /*coordSelectionStart*/, const til::point /*coordSelectionEnd*/, const TextAttribute /*attr*/) - { - } - const bool IsUiaDataInitialized() const noexcept { return true; diff --git a/src/renderer/inc/IRenderData.hpp b/src/renderer/inc/IRenderData.hpp index e45dc388ac7..69e0dc2a0e6 100644 --- a/src/renderer/inc/IRenderData.hpp +++ b/src/renderer/inc/IRenderData.hpp @@ -73,7 +73,6 @@ namespace Microsoft::Console::Render virtual void SelectNewRegion(const til::point coordStart, const til::point coordEnd) = 0; virtual const til::point GetSelectionAnchor() const noexcept = 0; virtual const til::point GetSelectionEnd() const noexcept = 0; - virtual void ColorSelection(const til::point coordSelectionStart, const til::point coordSelectionEnd, const TextAttribute attr) = 0; virtual const bool IsUiaDataInitialized() const noexcept = 0; }; } From 1fc01d8197eb2fe87e2cfe096e219344c24d5944 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 23 Aug 2023 17:09:39 +0200 Subject: [PATCH 13/22] Address Dustin's feedback --- src/buffer/out/search.cpp | 4 ++-- src/buffer/out/search.h | 2 +- src/buffer/out/textBuffer.cpp | 12 ++++++------ src/buffer/out/textBuffer.hpp | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/buffer/out/search.cpp b/src/buffer/out/search.cpp index 2c38c80ae23..b1b7cb15296 100644 --- a/src/buffer/out/search.cpp +++ b/src/buffer/out/search.cpp @@ -25,7 +25,7 @@ Search::Search(Microsoft::Console::Render::IRenderData& renderData, const std::w const auto& textBuffer = _renderData->GetTextBuffer(); _results = textBuffer.SearchText(str, caseInsensitive); - _mutationCount = textBuffer.GetMutationCount(); + _lastMutationId = textBuffer.GetLastMutationId(); if (_results.empty()) { @@ -58,7 +58,7 @@ Search::Search(Microsoft::Console::Render::IRenderData& renderData, const std::w bool Search::IsStale() const noexcept { - return !_renderData || _renderData->GetTextBuffer().GetMutationCount() != _mutationCount; + return !_renderData || _renderData->GetTextBuffer().GetLastMutationId() != _lastMutationId; } // Routine Description: diff --git a/src/buffer/out/search.h b/src/buffer/out/search.h index 6adb2be9bd3..d5fbf41f4a8 100644 --- a/src/buffer/out/search.h +++ b/src/buffer/out/search.h @@ -35,5 +35,5 @@ class Search final std::vector _results; ptrdiff_t _index = 0; ptrdiff_t _step = 0; - uint64_t _mutationCount = 0; + uint64_t _lastMutationId = 0; }; diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 24762954632..48883217c55 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -31,7 +31,7 @@ constexpr bool allWhitespace(const std::wstring_view& text) noexcept return true; } -static std::atomic s_mutationCountInitialValue; +static std::atomic s_lastMutationIdInitialValue; // Routine Description: // - Creates a new instance of TextBuffer @@ -51,9 +51,9 @@ TextBuffer::TextBuffer(til::size screenBufferSize, Microsoft::Console::Render::Renderer& renderer) : _renderer{ renderer }, _currentAttributes{ defaultAttributes }, - // This way every TextBuffer will start with a ""unique"" _mutationCount + // This way every TextBuffer will start with a ""unique"" _lastMutationId // and so it'll compare unequal with the counter of other TextBuffers. - _mutationCount{ s_mutationCountInitialValue.fetch_add(0x100000000) }, + _lastMutationId{ s_lastMutationIdInitialValue.fetch_add(0x100000000) }, _cursor{ cursorSize, *this }, _isActiveBuffer{ isActiveBuffer } { @@ -225,7 +225,7 @@ const ROW& TextBuffer::GetRowByOffset(const til::CoordType index) const // (what corresponds to the top row of the screen buffer). ROW& TextBuffer::GetMutableRowByOffset(const til::CoordType index) { - _mutationCount++; + _lastMutationId++; return _getRow(index); } @@ -907,9 +907,9 @@ const Cursor& TextBuffer::GetCursor() const noexcept return _cursor; } -uint64_t TextBuffer::GetMutationCount() const noexcept +uint64_t TextBuffer::GetLastMutationId() const noexcept { - return _mutationCount; + return _lastMutationId; } const TextAttribute& TextBuffer::GetCurrentAttributes() const noexcept diff --git a/src/buffer/out/textBuffer.hpp b/src/buffer/out/textBuffer.hpp index 19744b1bed4..a57474b6b81 100644 --- a/src/buffer/out/textBuffer.hpp +++ b/src/buffer/out/textBuffer.hpp @@ -166,7 +166,7 @@ class TextBuffer final Cursor& GetCursor() noexcept; const Cursor& GetCursor() const noexcept; - uint64_t GetMutationCount() const noexcept; + uint64_t GetLastMutationId() const noexcept; const til::CoordType GetFirstRowIndex() const noexcept; const Microsoft::Console::Types::Viewport GetSize() const noexcept; @@ -371,7 +371,7 @@ class TextBuffer final TextAttribute _currentAttributes; til::CoordType _firstRow = 0; // indexes top row (not necessarily 0) - uint64_t _mutationCount = 0; + uint64_t _lastMutationId = 0; Cursor _cursor; std::vector _marks; From 7cccf568a7381cb001a4cba7757f300026eb27a3 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 23 Aug 2023 17:09:50 +0200 Subject: [PATCH 14/22] Optimize GetReadableColumnCount --- src/buffer/out/Row.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 649825267bf..73d98c3a08a 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -215,10 +215,11 @@ LineRendition ROW::GetLineRendition() const noexcept // Console APIs treat the buffer as a large NxM matrix after all. til::CoordType ROW::GetReadableColumnCount() const noexcept { - const til::CoordType columnCount = _columnCount; - const til::CoordType scale = _lineRendition != LineRendition::SingleWidth; - const til::CoordType padding = _doubleBytePadded; - return (columnCount - padding) >> scale; + if (_lineRendition == LineRendition::SingleWidth) [[likely]] + { + return _columnCount - _doubleBytePadded; + } + return (_columnCount - (_doubleBytePadded << 1)) >> 1; } // Routine Description: From 362693b3269ee8611b2600b12cf8e7a85e0576a3 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 23 Aug 2023 17:12:24 +0200 Subject: [PATCH 15/22] Fix AuditMode failures --- src/buffer/out/UTextAdapter.cpp | 4 +--- src/buffer/out/UTextAdapter.h | 2 +- src/buffer/out/textBuffer.cpp | 4 ++-- src/cascadia/TerminalCore/Terminal.cpp | 5 ++--- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index 986a0de446c..97f04a88fcf 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -264,10 +264,8 @@ static constexpr UTextFuncs utextFuncs{ }; // Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd). -UText Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept +UText Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept { - __assume(status != nullptr); - UText ut = UTEXT_INITIALIZER; ut.providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS); ut.pFuncs = &utextFuncs; diff --git a/src/buffer/out/UTextAdapter.h b/src/buffer/out/UTextAdapter.h index 158fd25ca2d..c8c325143ef 100644 --- a/src/buffer/out/UTextAdapter.h +++ b/src/buffer/out/UTextAdapter.h @@ -11,7 +11,7 @@ namespace Microsoft::Console::ICU { using unique_uregex = wistd::unique_ptr>; - UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd, UErrorCode* status) noexcept; + UText UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept; unique_uregex CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept; til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re); } diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 48883217c55..f5bfdeda3ce 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -2766,12 +2766,12 @@ std::vector TextBuffer::SearchText(const std::wstring_view& nee return results; } + auto text = ICU::UTextFromTextBuffer(*this, rowBeg, rowEnd); + uint32_t flags = UREGEX_LITERAL; WI_SetFlagIf(flags, UREGEX_CASE_INSENSITIVE, caseInsensitive); UErrorCode status = U_ZERO_ERROR; - auto text = ICU::UTextFromTextBuffer(*this, rowBeg, rowEnd, &status); - const auto re = ICU::CreateRegex(needle, flags, &status); uregex_setUText(re.get(), &text, &status); diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index a8602a5681a..c64160a29aa 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -1412,10 +1412,9 @@ PointTree Terminal::_getPatterns(til::CoordType beg, til::CoordType end) const LR"(\b(?:https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|$!:,.;]*[A-Za-z0-9+&@#/%=~_|$])", }; - PointTree::interval_vector intervals; - + auto text = ICU::UTextFromTextBuffer(_activeBuffer(), beg, end + 1); UErrorCode status = U_ZERO_ERROR; - auto text = ICU::UTextFromTextBuffer(_activeBuffer(), beg, end + 1, &status); + PointTree::interval_vector intervals; for (size_t i = 0; i < patterns.size(); ++i) { From bf419ca4b9a20fee8ba03e251e95007bfb018e78 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 24 Aug 2023 19:26:44 +0200 Subject: [PATCH 16/22] Cache search parameters in Search, Improve UIA search --- src/buffer/out/search.cpp | 114 +++++++++++-------- src/buffer/out/search.h | 17 ++- src/cascadia/TerminalControl/ControlCore.cpp | 13 ++- src/cascadia/TerminalControl/ControlCore.h | 3 - src/interactivity/win32/find.cpp | 39 +++---- src/types/UiaTextRangeBase.cpp | 75 ++++-------- src/types/UiaTextRangeBase.hpp | 14 +-- 7 files changed, 127 insertions(+), 148 deletions(-) diff --git a/src/buffer/out/search.cpp b/src/buffer/out/search.cpp index b1b7cb15296..afeb03f9cb1 100644 --- a/src/buffer/out/search.cpp +++ b/src/buffer/out/search.cpp @@ -8,79 +8,97 @@ using namespace Microsoft::Console::Types; -// Routine Description: -// - Constructs a Search object. -// - Make a Search object then call .FindNext() to locate items. -// - Once you've found something, you can perform actions like .Select() or .Color() -// Arguments: -// - textBuffer - The screen text buffer to search through (the "haystack") -// - renderData - The IRenderData type reference, it is for providing selection methods -// - str - The search term you want to find (the "needle") -// - reverse - True when searching backward/upwards in the buffer -// - caseInsensitive - As the name indicates: case insensitivity -Search::Search(Microsoft::Console::Render::IRenderData& renderData, const std::wstring_view str, bool reverse, bool caseInsensitive) : - _renderData{ &renderData }, - _step{ reverse ? -1 : 1 } +bool Search::ResetIfStale(Microsoft::Console::Render::IRenderData& renderData, const std::wstring_view& needle, bool reverse, bool caseInsensitive) { - const auto& textBuffer = _renderData->GetTextBuffer(); + const auto& textBuffer = renderData.GetTextBuffer(); + const auto lastMutationId = textBuffer.GetLastMutationId(); + + if (_needle == needle && + _reverse == reverse && + _caseInsensitive == caseInsensitive && + _lastMutationId == lastMutationId) + { + return false; + } + + _renderData = &renderData; + _needle = needle; + _reverse = reverse; + _caseInsensitive = caseInsensitive; + _lastMutationId = lastMutationId; + + _results = textBuffer.SearchText(needle, caseInsensitive); + _index = reverse ? gsl::narrow_cast(_results.size()) - 1 : 0; + _step = reverse ? -1 : 1; + + return true; +} - _results = textBuffer.SearchText(str, caseInsensitive); - _lastMutationId = textBuffer.GetLastMutationId(); +void Search::MovePastCurrentSelection() +{ + if (_renderData->IsSelectionActive()) + { + MovePastPoint(_renderData->GetTextBuffer().ScreenToBufferPosition(_renderData->GetSelectionAnchor())); + } +} +void Search::MovePastPoint(const til::point anchor) +{ if (_results.empty()) { return; } - const auto highestIndex = gsl::narrow_cast(_results.size()) - 1; - auto firstIndex = reverse ? highestIndex : 0; + const auto count = gsl::narrow_cast(_results.size()); + const auto highestIndex = count - 1; + auto index = _reverse ? highestIndex : 0; - if (_renderData->IsSelectionActive()) + if (_reverse) { - const auto anchor = textBuffer.ScreenToBufferPosition(_renderData->GetSelectionAnchor()); - if (reverse) + for (; index >= 0 && til::at(_results, index).start >= anchor; --index) { - for (; firstIndex >= 0 && til::at(_results, firstIndex).start >= anchor; --firstIndex) - { - } } - else + } + else + { + for (; index <= highestIndex && til::at(_results, index).start <= anchor; ++index) { - for (; firstIndex <= highestIndex && til::at(_results, firstIndex).start <= anchor; ++firstIndex) - { - } } } - // We reverse the _index by 1 so that the first call to FindNext() moves it to the firstIndex we found. - _index = firstIndex - _step; + _index = (index + count) % count; } -bool Search::IsStale() const noexcept +void Search::FindNext() { - return !_renderData || _renderData->GetTextBuffer().GetLastMutationId() != _lastMutationId; + const auto count = gsl::narrow_cast(_results.size()); + _index = (_index + _step + count) % count; +} + +const til::point_span* Search::GetCurrent() const noexcept +{ + const auto index = gsl::narrow_cast(_index); + if (index < _results.size()) + { + return &til::at(_results, index); + } + return nullptr; } // Routine Description: // - Takes the found word and selects it in the screen buffer -bool Search::SelectNext() +bool Search::SelectCurrent() const noexcept { - if (_results.empty()) + if (const auto s = GetCurrent()) { - return false; + // Convert buffer selection offsets into the equivalent screen coordinates + // required by SelectNewRegion, taking line renditions into account. + const auto& textBuffer = _renderData->GetTextBuffer(); + const auto selStart = textBuffer.BufferToScreenPosition(s->start); + const auto selEnd = textBuffer.BufferToScreenPosition(s->end); + _renderData->SelectNewRegion(selStart, selEnd); + return true; } - const auto count = gsl::narrow_cast(_results.size()); - const auto& textBuffer = _renderData->GetTextBuffer(); - - _index = (_index + _step + count) % count; - - const auto& s = til::at(_results, _index); - // Convert buffer selection offsets into the equivalent screen coordinates - // required by SelectNewRegion, taking line renditions into account. - const auto selStart = textBuffer.BufferToScreenPosition(s.start); - const auto selEnd = textBuffer.BufferToScreenPosition(s.end); - - _renderData->SelectNewRegion(selStart, selEnd); - return true; + return false; } diff --git a/src/buffer/out/search.h b/src/buffer/out/search.h index d5fbf41f4a8..2db4b047b8a 100644 --- a/src/buffer/out/search.h +++ b/src/buffer/out/search.h @@ -24,16 +24,25 @@ class Search final { public: Search() = default; - Search(Microsoft::Console::Render::IRenderData& renderData, std::wstring_view str, bool reverse, bool caseInsensitive); - bool IsStale() const noexcept; - bool SelectNext(); + bool ResetIfStale(Microsoft::Console::Render::IRenderData& renderData, const std::wstring_view& needle, bool reverse, bool caseInsensitive); + + void MovePastCurrentSelection(); + void MovePastPoint(til::point anchor); + void FindNext(); + + const til::point_span* GetCurrent() const noexcept; + bool SelectCurrent() const noexcept; private: // _renderData is a pointer so that Search() is constexpr default constructable. Microsoft::Console::Render::IRenderData* _renderData = nullptr; + std::wstring_view _needle; + bool _reverse = false; + bool _caseInsensitive = false; + uint64_t _lastMutationId = 0; + std::vector _results; ptrdiff_t _index = 0; ptrdiff_t _step = 0; - uint64_t _lastMutationId = 0; }; diff --git a/src/cascadia/TerminalControl/ControlCore.cpp b/src/cascadia/TerminalControl/ControlCore.cpp index 3e0a9fe7e9c..d82dcf43edf 100644 --- a/src/cascadia/TerminalControl/ControlCore.cpp +++ b/src/cascadia/TerminalControl/ControlCore.cpp @@ -1543,15 +1543,16 @@ namespace winrt::Microsoft::Terminal::Control::implementation { auto lock = _terminal->LockForWriting(); - if (_searcher.IsStale() || _searcherText != text || _searcherGoForward != goForward || _searcherCaseSensitive != caseSensitive) + if (_searcher.ResetIfStale(*GetRenderData(), text, !goForward, !caseSensitive)) { - _searcher = { *GetRenderData(), text, !goForward, !caseSensitive }; - _searcherText = text; - _searcherGoForward = goForward; - _searcherCaseSensitive = caseSensitive; + _searcher.MovePastCurrentSelection(); + } + else + { + _searcher.FindNext(); } - const auto foundMatch = _searcher.SelectNext(); + const auto foundMatch = _searcher.SelectCurrent(); if (foundMatch) { // this is used for search, diff --git a/src/cascadia/TerminalControl/ControlCore.h b/src/cascadia/TerminalControl/ControlCore.h index 88b7c6d1ee1..9ad2a3a2fe2 100644 --- a/src/cascadia/TerminalControl/ControlCore.h +++ b/src/cascadia/TerminalControl/ControlCore.h @@ -305,9 +305,6 @@ namespace winrt::Microsoft::Terminal::Control::implementation std::unique_ptr<::Microsoft::Console::Render::Renderer> _renderer{ nullptr }; ::Search _searcher; - winrt::hstring _searcherText; - bool _searcherGoForward = false; - bool _searcherCaseSensitive = false; winrt::handle _lastSwapChainHandle{ nullptr }; diff --git a/src/interactivity/win32/find.cpp b/src/interactivity/win32/find.cpp index a0d949130b7..93c4d55a8b4 100644 --- a/src/interactivity/win32/find.cpp +++ b/src/interactivity/win32/find.cpp @@ -41,23 +41,27 @@ INT_PTR CALLBACK FindDialogProc(HWND hWnd, UINT Message, WPARAM wParam, LPARAM l { case IDOK: { + auto length = SendDlgItemMessageW(hWnd, ID_CONSOLE_FINDSTR, WM_GETTEXTLENGTH, 0, 0); + lastFindString.resize(length); + length = GetDlgItemTextW(hWnd, ID_CONSOLE_FINDSTR, lastFindString.data(), gsl::narrow_cast(length + 1)); + lastFindString.resize(length); + + caseInsensitive = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDCASE) == 0; + reverse = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDDOWN) == 0; + LockConsole(); auto Unlock = wil::scope_exit([&] { UnlockConsole(); }); - if (searcher.IsStale()) + if (searcher.ResetIfStale(gci.renderData, lastFindString, reverse, caseInsensitive)) { - auto length = SendDlgItemMessageW(hWnd, ID_CONSOLE_FINDSTR, WM_GETTEXTLENGTH, 0, 0); - lastFindString.resize(length); - length = GetDlgItemTextW(hWnd, ID_CONSOLE_FINDSTR, lastFindString.data(), gsl::narrow_cast(length + 1)); - lastFindString.resize(length); - - caseInsensitive = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDCASE) == 0; - reverse = IsDlgButtonChecked(hWnd, ID_CONSOLE_FINDDOWN) == 0; - - searcher = Search{ gci.renderData, lastFindString, reverse, caseInsensitive }; + searcher.MovePastCurrentSelection(); + } + else + { + searcher.FindNext(); } - if (searcher.SelectNext()) + if (searcher.SelectCurrent()) { return TRUE; } @@ -69,19 +73,6 @@ INT_PTR CALLBACK FindDialogProc(HWND hWnd, UINT Message, WPARAM wParam, LPARAM l EndDialog(hWnd, 0); searcher = Search{}; return TRUE; - case ID_CONSOLE_FINDSTR: - case ID_CONSOLE_FINDCASE: - case ID_CONSOLE_FINDUP: - case ID_CONSOLE_FINDDOWN: - { - const auto hi = HIWORD(wParam); - // ID_CONSOLE_FINDSTR emits EN_CHANGE and the other 3 emit 0 when changing. - if (hi == 0 || hi == EN_CHANGE) - { - searcher = Search{}; - } - break; - } default: break; } diff --git a/src/types/UiaTextRangeBase.cpp b/src/types/UiaTextRangeBase.cpp index e3f9a0dde62..c7312ea9778 100644 --- a/src/types/UiaTextRangeBase.cpp +++ b/src/types/UiaTextRangeBase.cpp @@ -3,8 +3,7 @@ #include "precomp.h" #include "UiaTextRangeBase.hpp" -#include "ScreenInfoUiaProviderBase.h" -#include "../buffer/out/search.h" + #include "UiaTracing.h" using namespace Microsoft::Console::Types; @@ -599,74 +598,42 @@ IFACEMETHODIMP UiaTextRangeBase::FindText(_In_ BSTR text, _Outptr_result_maybenull_ ITextRangeProvider** ppRetVal) noexcept try { - RETURN_HR_IF(E_INVALIDARG, ppRetVal == nullptr); + THROW_HR_IF(E_INVALIDARG, ppRetVal == nullptr); *ppRetVal = nullptr; _pData->LockConsole(); auto Unlock = wil::scope_exit([&]() noexcept { _pData->UnlockConsole(); }); - RETURN_HR_IF(E_FAIL, !_pData->IsUiaDataInitialized()); + THROW_HR_IF(E_FAIL, !_pData->IsUiaDataInitialized()); const std::wstring_view queryText{ text, SysStringLen(text) }; - const auto results = _pData->GetTextBuffer().SearchText(queryText, ignoreCase != FALSE, _start.y, _end.y + 1); - if (results.empty()) - { - return S_OK; - } + auto exclusiveBegin = _start; - const auto highestIndex = results.size() - 1; - const til::point_span* hit = nullptr; + // MovePastPoint() moves *past* the given point. + // -> We need to turn [_beg,_end) into (_beg,_end). + exclusiveBegin.x--; - if (searchBackward) - { - // Find the last result that is in the [_start,_end) range. - for (size_t i = highestIndex;; --i) - { - hit = &til::at(results, i); - if (hit->end < _end) - { - break; - } + _searcher.ResetIfStale(*_pData, queryText, searchBackward, ignoreCase); + _searcher.MovePastPoint(searchBackward ? _end : exclusiveBegin); - // ...but exit when none are found. - if (i <= 0) - { - return S_OK; - } - } - } - else - { - // Find the first result that is in the [_start,_end) range. - for (size_t i = 0;; ++i) - { - hit = &til::at(results, i); - if (hit->start >= _start) - { - break; - } + til::point hitBeg{ til::CoordTypeMax, til::CoordTypeMax }; + til::point hitEnd{ til::CoordTypeMin, til::CoordTypeMin }; - // ...but exit when none are found. - if (i >= highestIndex) - { - return S_OK; - } - } + if (const auto hit = _searcher.GetCurrent()) + { + hitBeg = hit->start; + hitEnd = hit->end; + // we need to increment the position of end because it's exclusive + _pData->GetTextBuffer().GetSize().IncrementInBounds(hitEnd, true); } - const auto start = hit->start; - auto end = hit->end; - - // we need to increment the position of end because it's exclusive - _getOptimizedBufferSize().IncrementInBounds(end, true); - - if (start >= _start && end <= _end) + if (hitBeg >= _start && hitEnd <= _end) { - RETURN_IF_FAILED(Clone(ppRetVal)); + THROW_IF_FAILED(Clone(ppRetVal)); auto& range = static_cast(**ppRetVal); - range._start = start; - range._end = end; + range._start = hitBeg; + range._end = hitEnd; UiaTracing::TextRange::FindText(*this, queryText, searchBackward, ignoreCase, range); } diff --git a/src/types/UiaTextRangeBase.hpp b/src/types/UiaTextRangeBase.hpp index c69ff5b4a73..eec30a3dcbd 100644 --- a/src/types/UiaTextRangeBase.hpp +++ b/src/types/UiaTextRangeBase.hpp @@ -18,16 +18,11 @@ Author(s): #pragma once -#include "inc/viewport.hpp" -#include "../buffer/out/textBuffer.hpp" -#include "../renderer/inc/IRenderData.hpp" -#include "unicode.hpp" -#include "IUiaTraceable.h" - #include -#include -#include -#include + +#include "IUiaTraceable.h" +#include "unicode.hpp" +#include "../buffer/out/search.h" #ifdef UNIT_TESTING class UiaTextRangeTests; @@ -126,6 +121,7 @@ namespace Microsoft::Console::Types IRawElementProviderSimple* _pProvider{ nullptr }; std::wstring _wordDelimiters{}; + ::Search _searcher; virtual void _TranslatePointToScreen(til::point* clientPoint) const = 0; virtual void _TranslatePointFromScreen(til::point* screenPoint) const = 0; From 801fc76b71af3365b67e24fad48ffec1297ffc48 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 24 Aug 2023 22:13:15 +0200 Subject: [PATCH 17/22] Revert unnecessary changes, Fix tests --- src/host/ut_host/SearchTests.cpp | 15 +++++++++++---- src/types/UiaTextRangeBase.cpp | 6 +++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/host/ut_host/SearchTests.cpp b/src/host/ut_host/SearchTests.cpp index d786cd0ddd2..a47b8fe32b1 100644 --- a/src/host/ut_host/SearchTests.cpp +++ b/src/host/ut_host/SearchTests.cpp @@ -64,25 +64,32 @@ class SearchTests auto coordEndExpected = coordStartExpected; coordEndExpected.x += 1; - VERIFY_IS_TRUE(s.SelectNext()); + s.FindNext(); + VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - VERIFY_IS_TRUE(s.SelectNext()); + + s.FindNext(); + VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - VERIFY_IS_TRUE(s.SelectNext()); + + s.FindNext(); + VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - VERIFY_IS_TRUE(s.SelectNext()); + + s.FindNext(); + VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); } diff --git a/src/types/UiaTextRangeBase.cpp b/src/types/UiaTextRangeBase.cpp index c7312ea9778..321b86756fb 100644 --- a/src/types/UiaTextRangeBase.cpp +++ b/src/types/UiaTextRangeBase.cpp @@ -598,14 +598,14 @@ IFACEMETHODIMP UiaTextRangeBase::FindText(_In_ BSTR text, _Outptr_result_maybenull_ ITextRangeProvider** ppRetVal) noexcept try { - THROW_HR_IF(E_INVALIDARG, ppRetVal == nullptr); + RETURN_HR_IF(E_INVALIDARG, ppRetVal == nullptr); *ppRetVal = nullptr; _pData->LockConsole(); auto Unlock = wil::scope_exit([&]() noexcept { _pData->UnlockConsole(); }); - THROW_HR_IF(E_FAIL, !_pData->IsUiaDataInitialized()); + RETURN_HR_IF(E_FAIL, !_pData->IsUiaDataInitialized()); const std::wstring_view queryText{ text, SysStringLen(text) }; auto exclusiveBegin = _start; @@ -630,7 +630,7 @@ try if (hitBeg >= _start && hitEnd <= _end) { - THROW_IF_FAILED(Clone(ppRetVal)); + RETURN_IF_FAILED(Clone(ppRetVal)); auto& range = static_cast(**ppRetVal); range._start = hitBeg; range._end = hitEnd; From 078c7be734b156a0480d30d24382f60e4eea982f Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 24 Aug 2023 23:44:27 +0200 Subject: [PATCH 18/22] Turn internURegularExpression into URegularExpressionInterner --- src/buffer/out/Row.cpp | 6 +- src/cascadia/TerminalCore/Terminal.cpp | 90 ++++++++++++++------------ 2 files changed, 50 insertions(+), 46 deletions(-) diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp index 73d98c3a08a..6c1c136e947 100644 --- a/src/buffer/out/Row.cpp +++ b/src/buffer/out/Row.cpp @@ -1034,14 +1034,12 @@ std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType column til::CoordType ROW::GetLeadingColumnAtCharOffset(const ptrdiff_t offset) const noexcept { - auto mapper = _createCharToColumnMapper(offset); - return mapper.GetLeadingColumnAt(offset); + return _createCharToColumnMapper(offset).GetLeadingColumnAt(offset); } til::CoordType ROW::GetTrailingColumnAtCharOffset(const ptrdiff_t offset) const noexcept { - auto mapper = _createCharToColumnMapper(offset); - return mapper.GetTrailingColumnAt(offset); + return _createCharToColumnMapper(offset).GetTrailingColumnAt(offset); } DelimiterClass ROW::DelimiterClassAt(til::CoordType column, const std::wstring_view& wordDelimiters) const noexcept diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index c64160a29aa..7134fac26eb 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -1345,9 +1345,49 @@ void Terminal::_updateUrlDetection() } } -// Interns URegularExpression instances so that they can be reused. This method is thread-safe. -static ICU::unique_uregex internURegularExpression(const std::wstring_view& pattern) +struct URegularExpressionInterner { + // Interns (caches) URegularExpression instances so that they can be reused. This method is thread-safe. + // uregex_open is not terribly expensive at ~10us/op, but it's also much more expensive than uregex_clone + // at ~400ns/op and would effectively double the time it takes to scan the viewport for patterns. + // + // An alternative approach would be to not make this method thread-safe and give each + // Terminal instance its own cache. I'm not sure which approach would've been better. + ICU::unique_uregex Intern(const std::wstring_view& pattern) + { + UErrorCode status = U_ZERO_ERROR; + + { + const auto guard = _lock.lock_shared(); + if (const auto it = _cache.find(pattern); it != _cache.end()) + { + return ICU::unique_uregex{ uregex_clone(it->second.re.get(), &status) }; + } + } + + // Even if the URegularExpression creation failed, we'll insert it into the cache, because there's no point in retrying. + // (Apart from OOM but in that case this application will crash anyways in 3.. 2.. 1..) + auto re = ICU::CreateRegex(pattern, 0, &status); + ICU::unique_uregex clone{ uregex_clone(re.get(), &status) }; + std::wstring key{ pattern }; + + const auto guard = _lock.lock_exclusive(); + + _cache.insert_or_assign(std::move(key), CacheValue{ std::move(re), _totalInsertions }); + _totalInsertions++; + + // If the cache is full remove the oldest element (oldest = lowest generation, just like with humans). + if (_cache.size() > cacheSizeLimit) + { + _cache.erase(std::min_element(_cache.begin(), _cache.end(), [](const auto& it, const auto& smallest) { + return it.second.generation < smallest.second.generation; + })); + } + + return clone; + } + +private: struct CacheValue { ICU::unique_uregex re; @@ -1364,47 +1404,13 @@ static ICU::unique_uregex internURegularExpression(const std::wstring_view& patt } }; - struct SharedState - { - wil::srwlock lock; - std::unordered_map> set; - size_t totalInsertions = 0; - }; - - static SharedState shared; static constexpr size_t cacheSizeLimit = 128; + wil::srwlock _lock; + std::unordered_map> _cache; + size_t _totalInsertions = 0; +}; - UErrorCode status = U_ZERO_ERROR; - - { - const auto guard = shared.lock.lock_shared(); - if (const auto it = shared.set.find(pattern); it != shared.set.end()) - { - return ICU::unique_uregex{ uregex_clone(it->second.re.get(), &status) }; - } - } - - // Even if the URegularExpression creation failed, we'll insert it into the cache, because there's no point in retrying. - // (Apart from OOM but in that case this application will crash anyways in 3.. 2.. 1..) - auto re = ICU::CreateRegex(pattern, 0, &status); - ICU::unique_uregex clone{ uregex_clone(re.get(), &status) }; - std::wstring key{ pattern }; - - const auto guard = shared.lock.lock_exclusive(); - - shared.set.insert_or_assign(std::move(key), CacheValue{ std::move(re), shared.totalInsertions }); - shared.totalInsertions++; - - // If the cache is full remove the oldest element (oldest = lowest generation, just like with humans). - if (shared.set.size() > cacheSizeLimit) - { - shared.set.erase(std::min_element(shared.set.begin(), shared.set.end(), [](const auto& it, const auto& smallest) { - return it.second.generation < smallest.second.generation; - })); - } - - return clone; -} +static URegularExpressionInterner uregexInterner; PointTree Terminal::_getPatterns(til::CoordType beg, til::CoordType end) const { @@ -1418,7 +1424,7 @@ PointTree Terminal::_getPatterns(til::CoordType beg, til::CoordType end) const for (size_t i = 0; i < patterns.size(); ++i) { - const auto re = internURegularExpression(patterns[i]); + const auto re = uregexInterner.Intern(patterns[i]); uregex_setUText(re.get(), &text, &status); if (uregex_find(re.get(), -1, &status)) From e7bc823dd294afbc417cf4d4f35fbf6f944965d9 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 24 Aug 2023 23:47:31 +0200 Subject: [PATCH 19/22] Fix AuditMode failures --- src/buffer/out/UTextAdapter.cpp | 1 + src/buffer/out/search.cpp | 6 +++--- src/buffer/out/search.h | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index 97f04a88fcf..7dec8d2dc04 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -266,6 +266,7 @@ static constexpr UTextFuncs utextFuncs{ // Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd). UText Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept { +#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47). UText ut = UTEXT_INITIALIZER; ut.providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS); ut.pFuncs = &utextFuncs; diff --git a/src/buffer/out/search.cpp b/src/buffer/out/search.cpp index afeb03f9cb1..0186110f6cf 100644 --- a/src/buffer/out/search.cpp +++ b/src/buffer/out/search.cpp @@ -42,7 +42,7 @@ void Search::MovePastCurrentSelection() } } -void Search::MovePastPoint(const til::point anchor) +void Search::MovePastPoint(const til::point anchor) noexcept { if (_results.empty()) { @@ -69,7 +69,7 @@ void Search::MovePastPoint(const til::point anchor) _index = (index + count) % count; } -void Search::FindNext() +void Search::FindNext() noexcept { const auto count = gsl::narrow_cast(_results.size()); _index = (_index + _step + count) % count; @@ -87,7 +87,7 @@ const til::point_span* Search::GetCurrent() const noexcept // Routine Description: // - Takes the found word and selects it in the screen buffer -bool Search::SelectCurrent() const noexcept +bool Search::SelectCurrent() const { if (const auto s = GetCurrent()) { diff --git a/src/buffer/out/search.h b/src/buffer/out/search.h index 2db4b047b8a..a337552d59a 100644 --- a/src/buffer/out/search.h +++ b/src/buffer/out/search.h @@ -28,11 +28,11 @@ class Search final bool ResetIfStale(Microsoft::Console::Render::IRenderData& renderData, const std::wstring_view& needle, bool reverse, bool caseInsensitive); void MovePastCurrentSelection(); - void MovePastPoint(til::point anchor); - void FindNext(); + void MovePastPoint(til::point anchor) noexcept; + void FindNext() noexcept; const til::point_span* GetCurrent() const noexcept; - bool SelectCurrent() const noexcept; + bool SelectCurrent() const; private: // _renderData is a pointer so that Search() is constexpr default constructable. From 2b88b6fb41a09bfd6cd12a63df3d7f56b7204b38 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 24 Aug 2023 23:49:48 +0200 Subject: [PATCH 20/22] would've is not a word --- .github/actions/spelling/expect/expect.txt | 1 + src/cascadia/TerminalCore/Terminal.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index fd5a01dc8f2..2e552912cfa 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -906,6 +906,7 @@ INSERTMODE INTERACTIVITYBASE INTERCEPTCOPYPASTE INTERNALNAME +Interner intsafe INVALIDARG INVALIDATERECT diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index 7134fac26eb..e8de1efed86 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -1352,7 +1352,7 @@ struct URegularExpressionInterner // at ~400ns/op and would effectively double the time it takes to scan the viewport for patterns. // // An alternative approach would be to not make this method thread-safe and give each - // Terminal instance its own cache. I'm not sure which approach would've been better. + // Terminal instance its own cache. I'm not sure which approach would have been better. ICU::unique_uregex Intern(const std::wstring_view& pattern) { UErrorCode status = U_ZERO_ERROR; From f9935f7a22bfa2ee98ec8f2b5a9698c9b188ab4e Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 24 Aug 2023 23:57:36 +0200 Subject: [PATCH 21/22] Fix MOAR AuditMode failures --- src/cascadia/TerminalCore/Terminal.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index e8de1efed86..c4e6edc2685 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -1398,7 +1398,7 @@ struct URegularExpressionInterner { using is_transparent = void; - std::size_t operator()(const std::wstring_view& str) const + std::size_t operator()(const std::wstring_view& str) const noexcept { return til::hash(str); } @@ -1424,7 +1424,7 @@ PointTree Terminal::_getPatterns(til::CoordType beg, til::CoordType end) const for (size_t i = 0; i < patterns.size(); ++i) { - const auto re = uregexInterner.Intern(patterns[i]); + const auto re = uregexInterner.Intern(patterns.at(i)); uregex_setUText(re.get(), &text, &status); if (uregex_find(re.get(), -1, &status)) From 1117450e9b5fe4c37587b2c759df34c007b21bd1 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 25 Aug 2023 00:25:48 +0200 Subject: [PATCH 22/22] SearchTests deserve love too, Leonard --- src/host/ut_host/SearchTests.cpp | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/host/ut_host/SearchTests.cpp b/src/host/ut_host/SearchTests.cpp index a47b8fe32b1..5a2e4b9bd6d 100644 --- a/src/host/ut_host/SearchTests.cpp +++ b/src/host/ut_host/SearchTests.cpp @@ -64,31 +64,30 @@ class SearchTests auto coordEndExpected = coordStartExpected; coordEndExpected.x += 1; - s.FindNext(); VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - s.FindNext(); + VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - s.FindNext(); + VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); coordStartExpected.y += lineDelta; coordEndExpected.y += lineDelta; - s.FindNext(); + VERIFY_IS_TRUE(s.SelectCurrent()); VERIFY_ARE_EQUAL(coordStartExpected, gci.renderData.GetSelectionAnchor()); VERIFY_ARE_EQUAL(coordEndExpected, gci.renderData.GetSelectionEnd()); @@ -98,14 +97,16 @@ class SearchTests { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"AB", false, false); + Search s; + s.ResetIfStale(gci.renderData, L"AB", false, false); DoFoundChecks(s, {}, 1); } TEST_METHOD(ForwardCaseSensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"\x304b", false, false); + Search s; + s.ResetIfStale(gci.renderData, L"\x304b", false, false); DoFoundChecks(s, { 2, 0 }, 1); } @@ -113,42 +114,48 @@ class SearchTests { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"ab", false, true); + Search s; + s.ResetIfStale(gci.renderData, L"ab", false, true); DoFoundChecks(s, {}, 1); } TEST_METHOD(ForwardCaseInsensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"\x304b", false, true); + Search s; + s.ResetIfStale(gci.renderData, L"\x304b", false, true); DoFoundChecks(s, { 2, 0 }, 1); } TEST_METHOD(BackwardCaseSensitive) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"AB", true, false); + Search s; + s.ResetIfStale(gci.renderData, L"AB", true, false); DoFoundChecks(s, { 0, 3 }, -1); } TEST_METHOD(BackwardCaseSensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"\x304b", true, false); + Search s; + s.ResetIfStale(gci.renderData, L"\x304b", true, false); DoFoundChecks(s, { 2, 3 }, -1); } TEST_METHOD(BackwardCaseInsensitive) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"ab", true, true); + Search s; + s.ResetIfStale(gci.renderData, L"ab", true, true); DoFoundChecks(s, { 0, 3 }, -1); } TEST_METHOD(BackwardCaseInsensitiveJapanese) { auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - Search s(gci.renderData, L"\x304b", true, true); + Search s; + s.ResetIfStale(gci.renderData, L"\x304b", true, true); DoFoundChecks(s, { 2, 3 }, -1); } };