Skip to content

Commit

Permalink
Correctly parse CDATA sections
Browse files Browse the repository at this point in the history
  • Loading branch information
gettalong committed Nov 18, 2024
1 parent b970b40 commit e5a9d64
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 5 deletions.
9 changes: 6 additions & 3 deletions lib/kramdown/converter/kramdown.rb
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,17 @@ def convert_blank(_el, _opts)
ESCAPED_CHAR_RE = /(\$\$|[\\*_`\[\]{"'|])|^ {0,3}(:)/

def convert_text(el, opts)
if opts[:raw_text]
if opts[:raw_text] || (@stack.last.type == :html_element && @stack.last.options[:content_model] == :raw)
el.value
else
el.value.gsub(/\A\n/) do
result = el.value.gsub(/\A\n/) do
opts[:prev] && opts[:prev].type == :br ? '' : "\n"
end.gsub(/\s+/, ' ').gsub(ESCAPED_CHAR_RE) do
end
result.gsub!(/\s+/, ' ') unless el.options[:cdata]
result.gsub!(ESCAPED_CHAR_RE) do
$1 || !opts[:prev] || opts[:prev].type == :br ? "\\#{$1 || $2}" : $&
end
result
end
end

Expand Down
5 changes: 4 additions & 1 deletion lib/kramdown/parser/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ module Constants
HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/im
HTML_COMMENT_RE = /<!--(.*?)-->/m
HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
HTML_CDATA_RE = /<!\[CDATA\[(.*?)\]\]>/m
HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(?:(\p{Word}+)|("|')(.*?)\3))?/m
HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(?:\p{Word}+|("|').*?\3))?)*)\s*(\/)?>/m
HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
Expand Down Expand Up @@ -136,7 +137,7 @@ def handle_raw_html_tag(name)
end
end

HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?|!\[CDATA\[))/ # :nodoc:

# Parse raw HTML from the current source position, storing the found elements in +el+.
# Parsing continues until one of the following criteria are fulfilled:
Expand All @@ -160,6 +161,8 @@ def parse_raw_html(el, &block)
@tree.children << Element.new(:xml_comment, result, nil, category: :block, location: line)
elsif (result = @src.scan(HTML_INSTRUCTION_RE))
@tree.children << Element.new(:xml_pi, result, nil, category: :block, location: line)
elsif @src.scan(HTML_CDATA_RE)
@tree.children << Element.new(:text, @src[1], nil, cdata: true, location: line)
elsif @src.scan(HTML_TAG_RE)
if method(:handle_html_start_tag).arity.abs >= 1
handle_html_start_tag(line, &block)
Expand Down
5 changes: 4 additions & 1 deletion lib/kramdown/parser/kramdown/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module Parser
class Kramdown

include Kramdown::Parser::Html::Parser
include Kramdown::Utils::Html

# Mapping of markdown attribute value to content model. I.e. :raw when "0", :default when "1"
# (use default content model for the HTML element), :span when "span", :block when block and
Expand Down Expand Up @@ -95,13 +96,15 @@ def parse_block_html
end
define_parser(:block_html, HTML_BLOCK_START)

HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|!--|\/)/
HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|!--|\/|!\[CDATA\[)/

# Parse the HTML at the current position as span-level HTML.
def parse_span_html
line = @src.current_line_number
if (result = @src.scan(HTML_COMMENT_RE))
@tree.children << Element.new(:xml_comment, result, nil, category: :span, location: line)
elsif @src.scan(HTML_CDATA_RE)
add_text(escape_html(@src[1]))
elsif (result = @src.scan(HTML_TAG_CLOSE_RE))
warning("Found invalidly used HTML closing tag for '#{@src[1]}' on line #{line}")
add_text(result)
Expand Down
10 changes: 10 additions & 0 deletions test/testcases/block/09_html/cdata_section.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<figure>
+-----------------------+
| Use XML, be Happy :-) |
|_______________________|

</figure>

<p>Inline section &lt;goes&gt; here.</p>

<p>Inline <kbd>test *case* *section* &lt;goes&gt; here</kbd> end.</p>
10 changes: 10 additions & 0 deletions test/testcases/block/09_html/cdata_section.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<figure><![CDATA[
+-----------------------+
| Use XML, be Happy :-) |
|_______________________|
]]>
</figure>

Inline <![CDATA[section <goes>]]> here.

Inline <kbd>test *case* <![CDATA[*section* <goes>]]> here</kbd> end.

0 comments on commit e5a9d64

Please sign in to comment.