Skip to content

Commit

Permalink
Match abbreviations across line breaks and other whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
gettalong committed Nov 17, 2024
1 parent 6f53282 commit 9ac9402
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 1 deletion.
6 changes: 6 additions & 0 deletions lib/kramdown/parser/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ def initialize(source, options)
@options = Kramdown::Options.merge(options)
@root = Element.new(:root, nil, nil, encoding: (source.encoding rescue nil), location: 1,
options: {}, abbrev_defs: {}, abbrev_attr: {})

@root.options[:abbrev_defs].default_proc = @root.options[:abbrev_attr].default_proc =
lambda do |h, k|
k_mod = k.gsub(/[\s\p{Z}]+/, " ")
k != k_mod ? h[k_mod] : nil
end
@warnings = []
@text_type = :text
end
Expand Down
4 changes: 3 additions & 1 deletion lib/kramdown/parser/kramdown/abbreviation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def replace_abbreviations(el, regexps = nil)
return if @root.options[:abbrev_defs].empty?
unless regexps
sorted_abbrevs = @root.options[:abbrev_defs].keys.sort {|a, b| b.length <=> a.length }
regexps = [Regexp.union(*sorted_abbrevs.map {|k| /#{Regexp.escape(k)}/ })]
regexps = [Regexp.union(*sorted_abbrevs.map do |k|
/#{Regexp.escape(k).gsub(/\\\s/, "[\\s\\p{Z}]+").force_encoding(Encoding::UTF_8)}/
end)]
regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
end
el.children.map! do |child|
Expand Down
4 changes: 4 additions & 0 deletions test/test_files.rb
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,11 @@ def tidy_output(out)
define_method("test_whether_#{conv_class}_modifies_tree_with_file_#{text_file.tr('.', '_')}") do
doc = Kramdown::Document.new(File.read(text_file), options)
options_before = Marshal.load(Marshal.dump(doc.options))
abbrev_proc = doc.root.options[:abbrev_defs].default_proc
doc.root.options[:abbrev_defs].default_proc = doc.root.options[:abbrev_attr].default_proc = nil
tree_before = Marshal.load(Marshal.dump(doc.root))
doc.root.options[:abbrev_defs].default_proc = doc.root.options[:abbrev_attr].default_proc =
abbrev_proc
Kramdown::Converter.const_get(conv_class).convert(doc.root, doc.options)
assert_equal(options_before, doc.options)
assert_tree_not_changed(tree_before, doc.root)
Expand Down
4 changes: 4 additions & 0 deletions test/testcases/span/abbreviations/abbrev.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@

<p>This is <abbr class="testit test" title="Some text here">awesome</abbr>.</p>

<p>hello <abbr title="baz">foo bar</abbr> babble.</p>

<p>hello <abbr title="baz">foo
bar</abbr> babble.</p>
7 changes: 7 additions & 0 deletions test/testcases/span/abbreviations/abbrev.text
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,10 @@ This is awesome.
{:.testit}
*[awesome]: Some text here
{:.test}

*[foo bar]: baz

hello foo bar babble.

hello foo
bar babble.

0 comments on commit 9ac9402

Please sign in to comment.