From 3d75a48c2d9e867283a00dc0f3dac2de84d22950 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 31 May 2023 16:38:05 +0200 Subject: [PATCH 1/2] Display Unicode URIs side-by-side with their Punycode encoding --- .github/actions/spelling/allow/apis.txt | 1 + .github/actions/spelling/expect/expect.txt | 8 +- .../Resources/en-US/Resources.resw | 4 + src/cascadia/TerminalControl/TermControl.cpp | 111 +++++++++++------- src/cascadia/TerminalControl/TermControl.h | 2 +- 5 files changed, 79 insertions(+), 47 deletions(-) diff --git a/.github/actions/spelling/allow/apis.txt b/.github/actions/spelling/allow/apis.txt index 38ef5c30d01..cfd2b503c49 100644 --- a/.github/actions/spelling/allow/apis.txt +++ b/.github/actions/spelling/allow/apis.txt @@ -75,6 +75,7 @@ IConnection ICustom IDialog IDirect +Idn IExplorer IFACEMETHOD IFile diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 496ffcf079f..4cd4436459a 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -221,6 +221,7 @@ cmt cmw cmyk CNL +cnn cnt CNTRL Codeflow @@ -260,7 +261,6 @@ condrv conechokey conemu configurability -confusables conhost conime conimeinfo @@ -813,7 +813,6 @@ HIBYTE hicon HIDEWINDOW hinst -Hirots HISTORYBUFS HISTORYNODUP HISTORYSIZE @@ -830,6 +829,8 @@ HMK hmod hmodule hmon +homeglyphs +homoglyph HORZ hostable hostlib @@ -1287,7 +1288,6 @@ nullability nullness nullonfailure nullopts -NULs numlock numpad NUMSCROLL @@ -1776,7 +1776,6 @@ somefile SOURCEBRANCH sourced spammy -spand SRCCODEPAGE SRCCOPY SRCINVERT @@ -2302,7 +2301,6 @@ xunit xutr XVIRTUALSCREEN XWalk -xwwyzz xxyyzz yact YCast diff --git a/src/cascadia/TerminalControl/Resources/en-US/Resources.resw b/src/cascadia/TerminalControl/Resources/en-US/Resources.resw index 4d95586e120..c94ab15d368 100644 --- a/src/cascadia/TerminalControl/Resources/en-US/Resources.resw +++ b/src/cascadia/TerminalControl/Resources/en-US/Resources.resw @@ -178,6 +178,10 @@ Ctrl+Click to follow link + + Invalid URI + Whenever we encounter an invalid URI or URL we show this string as a warning. + Unable to find the selected font "{0}". diff --git a/src/cascadia/TerminalControl/TermControl.cpp b/src/cascadia/TerminalControl/TermControl.cpp index 715939db7d7..ba19dfb3ca3 100644 --- a/src/cascadia/TerminalControl/TermControl.cpp +++ b/src/cascadia/TerminalControl/TermControl.cpp @@ -3042,56 +3042,85 @@ namespace winrt::Microsoft::Terminal::Control::implementation _core.ClearHoveredCell(); } - winrt::fire_and_forget TermControl::_hoveredHyperlinkChanged(IInspectable /*sender*/, - IInspectable /*args*/) + // Attackers abuse Unicode characters that happen to look similar to ASCII characters. Cyrillic for instance has + // its own glyphs for а, с, е, о, р, х, and у that look practically identical to their ASCII counterparts. + // This is called a "IDN homoglyph attack". + // + // But outright showing Punycode URIs only is similarly flawed as they can end up looking similar to valid ASCII URIs. + // xn--cnn.com for instance looks confusingly similar to cnn.com, but actually represents U+407E. + // + // An optimal solution would detect any URI that contains homoglyphs and show them in their Punycode form. + // Such a detector however is not quite trivial and requires constant maintenance, which this project's + // maintainers aren't currently well equipped to handle. As such we do the next best thing and show the + // Punycode encoding side-by-side with the Unicode string for any IDN. + static winrt::hstring sanitizeURI(winrt::hstring uri) { - auto weakThis{ get_weak() }; - co_await wil::resume_foreground(Dispatcher()); - if (auto self{ weakThis.get() }) + if (uri.empty()) { - auto lastHoveredCell = _core.HoveredCell(); - if (lastHoveredCell) - { - winrt::hstring uriText = _core.HoveredUriText(); - if (uriText.empty()) - { - co_return; - } + return uri; + } - try - { - // DisplayUri will filter out non-printable characters and confusables. - Windows::Foundation::Uri parsedUri{ uriText }; - if (!parsedUri) - { - co_return; - } - uriText = parsedUri.DisplayUri(); + wchar_t punycodeBuffer[256]; + wchar_t unicodeBuffer[256]; - const auto panel = SwapChainPanel(); - const auto scale = panel.CompositionScaleX(); - const auto offset = panel.ActualOffset(); + // These functions return int, but are documented to only return positive numbers. + // Better make sure though. It allows us to pass punycodeLength right into IdnToUnicode. + const auto punycodeLength = std::max(0, IdnToAscii(0, uri.data(), gsl::narrow(uri.size()), &punycodeBuffer[0], 256)); + const auto unicodeLength = std::max(0, IdnToUnicode(0, &punycodeBuffer[0], punycodeLength, &unicodeBuffer[0], 256)); - // Update the tooltip with the URI - HoveredUri().Text(uriText); + if (punycodeLength <= 0 || unicodeLength <= 0) + { + return RS_(L"InvalidUri"); + } - // Set the border thickness so it covers the entire cell - const auto charSizeInPixels = CharacterDimensions(); - const auto htInDips = charSizeInPixels.Height / scale; - const auto wtInDips = charSizeInPixels.Width / scale; - const Thickness newThickness{ wtInDips, htInDips, 0, 0 }; - HyperlinkTooltipBorder().BorderThickness(newThickness); + const std::wstring_view punycode{ &punycodeBuffer[0], gsl::narrow_cast(punycodeLength) }; + const std::wstring_view unicode{ &unicodeBuffer[0], gsl::narrow_cast(unicodeLength) }; - // Compute the location of the top left corner of the cell in DIPS - const til::point locationInDIPs{ _toPosInDips(lastHoveredCell.Value()) }; + // IdnToAscii/IdnToUnicode return the input string as is if it's all + // plain ASCII. But we don't know if the input URI is Punycode or not. + // --> It's non-Punycode and ASCII if it round-trips. + if (uri == punycode && uri == unicode) + { + return uri; + } - // Move the border to the top left corner of the cell - OverlayCanvas().SetLeft(HyperlinkTooltipBorder(), locationInDIPs.x - offset.x); - OverlayCanvas().SetTop(HyperlinkTooltipBorder(), locationInDIPs.y - offset.y); - } - CATCH_LOG(); - } + return winrt::hstring{ fmt::format(FMT_COMPILE(L"{}\n({})"), punycode, unicode) }; + } + + void TermControl::_hoveredHyperlinkChanged(const IInspectable& /*sender*/, const IInspectable& /*args*/) + { + const auto lastHoveredCell = _core.HoveredCell(); + if (!lastHoveredCell) + { + return; } + + const auto uriText = sanitizeURI(_core.HoveredUriText()); + if (uriText.empty()) + { + return; + } + + const auto panel = SwapChainPanel(); + const auto scale = panel.CompositionScaleX(); + const auto offset = panel.ActualOffset(); + + // Update the tooltip with the URI + HoveredUri().Text(uriText); + + // Set the border thickness so it covers the entire cell + const auto charSizeInPixels = CharacterDimensions(); + const auto htInDips = charSizeInPixels.Height / scale; + const auto wtInDips = charSizeInPixels.Width / scale; + const Thickness newThickness{ wtInDips, htInDips, 0, 0 }; + HyperlinkTooltipBorder().BorderThickness(newThickness); + + // Compute the location of the top left corner of the cell in DIPS + const til::point locationInDIPs{ _toPosInDips(lastHoveredCell.Value()) }; + + // Move the border to the top left corner of the cell + OverlayCanvas().SetLeft(HyperlinkTooltipBorder(), locationInDIPs.x - offset.x); + OverlayCanvas().SetTop(HyperlinkTooltipBorder(), locationInDIPs.y - offset.y); } winrt::fire_and_forget TermControl::_updateSelectionMarkers(IInspectable /*sender*/, Control::UpdateSelectionMarkersEventArgs args) diff --git a/src/cascadia/TerminalControl/TermControl.h b/src/cascadia/TerminalControl/TermControl.h index d0844119231..334c9b2ddc1 100644 --- a/src/cascadia/TerminalControl/TermControl.h +++ b/src/cascadia/TerminalControl/TermControl.h @@ -338,7 +338,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation void _CurrentCursorPositionHandler(const IInspectable& sender, const CursorPositionEventArgs& eventArgs); void _FontInfoHandler(const IInspectable& sender, const FontInfoEventArgs& eventArgs); - winrt::fire_and_forget _hoveredHyperlinkChanged(IInspectable sender, IInspectable args); + void _hoveredHyperlinkChanged(const IInspectable& sender, const IInspectable& args); winrt::fire_and_forget _updateSelectionMarkers(IInspectable sender, Control::UpdateSelectionMarkersEventArgs args); void _coreFontSizeChanged(const int fontWidth, From 2003bded28e0ee05491555894f5ba0c93eb63c01 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 9 Jun 2023 01:17:28 +0200 Subject: [PATCH 2/2] Update src/cascadia/TerminalControl/TermControl.cpp Co-authored-by: Carlos Zamora --- src/cascadia/TerminalControl/TermControl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cascadia/TerminalControl/TermControl.cpp b/src/cascadia/TerminalControl/TermControl.cpp index ba19dfb3ca3..1c1fc03da54 100644 --- a/src/cascadia/TerminalControl/TermControl.cpp +++ b/src/cascadia/TerminalControl/TermControl.cpp @@ -3044,7 +3044,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation // Attackers abuse Unicode characters that happen to look similar to ASCII characters. Cyrillic for instance has // its own glyphs for а, с, е, о, р, х, and у that look practically identical to their ASCII counterparts. - // This is called a "IDN homoglyph attack". + // This is called an "IDN homoglyph attack". // // But outright showing Punycode URIs only is similarly flawed as they can end up looking similar to valid ASCII URIs. // xn--cnn.com for instance looks confusingly similar to cnn.com, but actually represents U+407E.