diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java index e437cd9813..ae461e92ef 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java @@ -632,7 +632,9 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) { break; default: // todo - bring scan groups in if desired - if (inSorted(name, Constants.InBodyStartEmptyFormatters)) { + if (!Tag.isKnownTag(name)) { // no special rules for custom tags + tb.insert(startTag); + } else if (inSorted(name, Constants.InBodyStartEmptyFormatters)) { tb.reconstructFormattingElements(); tb.insertEmpty(startTag); tb.framesetOk(false); @@ -658,8 +660,7 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) { tb.error(this); return false; } else { - if (Tag.isKnownTag(name)) // don't reconstruct for custom elements - tb.reconstructFormattingElements(); + tb.reconstructFormattingElements(); tb.insert(startTag); } } diff --git a/src/main/java/org/jsoup/parser/Tag.java b/src/main/java/org/jsoup/parser/Tag.java index 1f43ead474..d573033a56 100644 --- a/src/main/java/org/jsoup/parser/Tag.java +++ b/src/main/java/org/jsoup/parser/Tag.java @@ -237,7 +237,8 @@ protected Tag clone() { "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins", "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th", "td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main", - "svg", "math", "center", "template" + "svg", "math", "center", "template", + "dir", "applet", "marquee", "listing" // deprecated but still known / special handling }; private static final String[] inlineTags = { "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd", @@ -245,7 +246,7 @@ protected Tag clone() { "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup", "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track", "summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track", - "data", "bdi", "s" + "data", "bdi", "s", "strike", "nobr" }; private static final String[] emptyTags = { "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command", diff --git a/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java b/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java index 29482c8428..5257975a11 100644 --- a/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java +++ b/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java @@ -1,6 +1,7 @@ package org.jsoup.parser; import org.jsoup.Jsoup; +import org.jsoup.internal.StringUtil; import org.jsoup.parser.HtmlTreeBuilderState.Constants; import org.junit.jupiter.api.Test; @@ -10,8 +11,8 @@ import java.util.Arrays; import java.util.List; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InBodyStartInputAttribs; +import static org.junit.jupiter.api.Assertions.*; public class HtmlTreeBuilderStateTest { static List findConstantArrays(Class aClass) { @@ -47,6 +48,18 @@ public void ensureArraysAreSorted() { assertEquals(40, constants.size()); } + @Test public void ensureTagSearchesAreKnownTags() { + List constants = findConstantArrays(Constants.class); + for (Object[] constant : constants) { + String[] tagNames = (String[]) constant; + for (String tagName : tagNames) { + if (StringUtil.inSorted(tagName, InBodyStartInputAttribs)) + continue; // odd one out in the constant + assertTrue(Tag.isKnownTag(tagName), String.format("Unknown tag name: %s", tagName)); + } + } + } + @Test public void nestedAnchorElements01() {