From 40e5db45979cee115e023bd860cbce7473a41511 Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Tue, 9 Apr 2024 22:42:51 +1000 Subject: [PATCH] debug handling of Word HTML namespaces: https://github.com/metanorma/metanorma/issues/363 --- html2doc.gemspec | 4 ++-- lib/html2doc/mime.rb | 2 +- spec/html2doc_spec.rb | 28 ++++++++++++++-------------- spec/spec_helper.rb | 4 +++- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/html2doc.gemspec b/html2doc.gemspec index dcb7c9d..1eef5b0 100644 --- a/html2doc.gemspec +++ b/html2doc.gemspec @@ -31,7 +31,7 @@ Gem::Specification.new do |spec| spec.add_dependency "mime-types" spec.add_dependency "nokogiri", "~> 1.15" spec.add_dependency "plane1converter", "~> 0.0.1" - spec.add_dependency "plurimath", "~> 0.7.0" + spec.add_dependency "plurimath", "~> 0.8.0" spec.add_dependency "thread_safe" spec.add_dependency "uuidtools" spec.add_dependency "unitsml" @@ -43,7 +43,7 @@ Gem::Specification.new do |spec| spec.add_development_dependency "guard-rspec", "~> 4.7" spec.add_development_dependency "rake", "~> 12.0" spec.add_development_dependency "rspec", "~> 3.6" - spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3" + spec.add_development_dependency "rspec-match_fuzzy", "~> 0.2.0" spec.add_development_dependency "rubocop", "~> 1.5.2" spec.add_development_dependency "simplecov", "~> 0.15" spec.add_development_dependency "timecop", "~> 0.9" diff --git a/lib/html2doc/mime.rb b/lib/html2doc/mime.rb index 64d70e9..5a15af3 100644 --- a/lib/html2doc/mime.rb +++ b/lib/html2doc/mime.rb @@ -124,7 +124,7 @@ def rename_image(img, dir, localdir) def skip_image_cleanup?(img) src = img["src"] - (img.element? && %w(img v:imagedata).include?(img.name)) or return true + (img.element? && %w(img imagedata).include?(img.name)) or return true (src.nil? || src.empty? || /^http/.match?(src) || %r{^data:(image|application)/[^;]+;base64}.match?(src)) and return true false diff --git a/spec/html2doc_spec.rb b/spec/html2doc_spec.rb index 07e2468..67c92c1 100644 --- a/spec/html2doc_spec.rb +++ b/spec/html2doc_spec.rb @@ -2,7 +2,7 @@ def html_input(xml) <<~HTML - blank + blank @@ -13,7 +13,7 @@ def html_input(xml) def html_input_no_title(xml) <<~HTML - + @@ -24,7 +24,7 @@ def html_input_no_title(xml) def html_input_empty_head(xml) <<~HTML - + #{xml} @@ -47,7 +47,7 @@ def mock_plurimath_error Content-Type: text/html; charset="utf-8" - + Print @@ -476,16 +476,16 @@ def image_clean(xml) ])) - expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) - .to match_fuzzy(<<~OUTPUT) - #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} - #{word_body(%{ -
- - log2puBB𝔹𝔹𝔹𝒞𝒞𝓑𝓒𝓒𝚃𝚃𝔉ℜ𝕭𝕱𝕽𝖲𝖥𝗕𝗦𝗙𝝰𝘚𝘍𝘐𝙎𝙁𝘽𝙄𝞪BIIII - -
}, '
')} - #{WORD_FTR1} + doc = File.read("test.doc", encoding: "utf-8") + .sub(%r{^.*}m, "") + .sub(%r{.*$}m, "") + expect(doc) + .to be_equivalent_to(<<~OUTPUT) + + + log2puBB𝔹𝔹𝔹𝒞𝒞𝓑𝓒𝓒𝚃𝚃𝔉ℜ𝕭𝕱𝕽𝖲𝖥𝗕𝗦𝗙𝝰𝘚𝘍𝘐𝙎𝙁𝘽𝙄𝞪BIIII + + OUTPUT end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 774f022..390fdc5 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -4,8 +4,10 @@ end require "bundler/setup" -require 'rspec/match_fuzzy' +require "rspec/match_fuzzy" require "html2doc" +require "rspec/matchers" +require "equivalent-xml" RSpec.configure do |config| # Enable flags like --only-failures and --next-failure