From 6af3ed8ef5d14b4b17e9c1631e67a0f645da1da5 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 22 Feb 2019 14:56:35 -0500 Subject: [PATCH] Self-Test: Add html diffing (#635) Compare the html built by AsciiDoc and Asciidoctor as part of the self-test. Like all good tests, it doesn't pass at first. I've temporarily ignored all of the failures and have added them to the main asciidoctor issue or filed them as their own issue. --- Dockerfile | 17 ++++++++-- Makefile | 6 ++-- integtest/Makefile | 10 ++++++ integtest/html_diff | 81 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 5 deletions(-) create mode 100755 integtest/html_diff diff --git a/Dockerfile b/Dockerfile index 4f133a59b9a27..ca05909ce3e8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,7 @@ LABEL MAINTAINERS="Nik Everett " # * openssh-client (used by git) # * openssh-server (used to forward ssh auth for git when running with --all on macOS) # * perl-base +# * python (is python2) # * xsltproc # * To install rubygems for asciidoctor # * build-essential @@ -23,7 +24,8 @@ LABEL MAINTAINERS="Nik Everett " # * ruby # * ruby-dev # * Used to check the docs build in CI -# * pycodestyle +# * python3 +# * python3-pip RUN install_packages \ bash \ build-essential \ @@ -38,8 +40,9 @@ RUN install_packages \ openssh-client \ openssh-server \ perl-base \ - pycodestyle \ python \ + python3 \ + python3-pip \ ruby \ ruby-dev \ unzip \ @@ -66,3 +69,13 @@ RUN gem install --no-document \ rubocop:0.64.0 \ rspec:3.8.0 \ thread_safe:0.3.6 + +# Wheel inventory: +# * Used to test the docs build +# * beautifulsoup4 +# * lxml +# * pycodestyle +RUN pip3 install \ + beautifulsoup4==4.7.1 \ + lxml==4.3.1 \ + pycodestyle==2.5.0 diff --git a/Makefile b/Makefile index 90c7b0f6ca826..142a31e97501e 100644 --- a/Makefile +++ b/Makefile @@ -8,10 +8,10 @@ MAKEFLAGS += --silent check: unit_test integration_test .PHONY: unit_test -unit_test: build_docs_check asciidoctor_check +unit_test: style asciidoctor_check -.PHONY: build_docs_check -build_docs_check: +.PHONY: style +style: build_docs pycodestyle build_docs .PHONY: asciidoctor_check diff --git a/integtest/Makefile b/integtest/Makefile index ac5cafcbeef81..f5f13183fc304 100644 --- a/integtest/Makefile +++ b/integtest/Makefile @@ -3,9 +3,14 @@ MAKEFLAGS += --silent .PHONY: check check: \ + style \ readme_expected_files readme_same_files \ includes_expected_files includes_same_files +.PHONY: style +style: html_diff + pycodestyle html_diff + define STANDARD_EXPECTED_FILES= [ -s $^/index.html ] [ -s $^/docs.js ] @@ -39,6 +44,11 @@ readme_expected_files: /tmp/readme_asciidoc | grep -v snippets/blocks \ ) \ <(cd /tmp/$*_asciidoctor && find * -type f | sort) + # The grep -v below are for known issues with asciidoctor + for file in $$(cd /tmp/$*_asciidoc && find * -type f -name '*.html' \ + | grep -v 'blocks\|changes\|experimental\|multi-part'); do \ + ./html_diff /tmp/$*_asciidoc/$$file /tmp/$*_asciidoctor/$$file; \ + done define BD= /docs_build/build_docs.pl --in_standard_docker --out $@ diff --git a/integtest/html_diff b/integtest/html_diff new file mode 100755 index 0000000000000..3f90e255a44be --- /dev/null +++ b/integtest/html_diff @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +# Script to compare two html files, ignoring differences that we consider +# to be unimportant. The output is a unified diff of formatted html meant +# to be readable and precise at identifying differences. +# +# This script is designed to be run in the container managed by the +# Dockerfile at the root of this repository. + + +from bs4 import BeautifulSoup, NavigableString +import difflib +import re + + +def normalize_html(html): + """Normalizes html to remove expected differences between AsciiDoc's + output and Asciidoctor's output. + """ + # Replace many whitespace characters with a single space in some elements + # kind of like a browser does. + soup = BeautifulSoup(html, 'lxml') + for e in soup.select(':not(script,pre,code,style)'): + for part in e: + if isinstance(part, NavigableString): + crunched = NavigableString(re.sub(r'\s+', ' ', part)) + if crunched != part: + part.replace_with(crunched) + # Format the html with indentation so we can *see* things + html = soup.prettify() + # Remove the zero width space that asciidoctor adds after each horizontal + # ellipsis. They don't hurt anything but asciidoc doesn't make them + html = html.replace('\u2026\u200b', '\u2026') + # Temporary workaround for known issues + html = html.replace('class="edit_me" href="/./', 'class="edit_me" href="') + html = re.sub( + r'(?m)^\s+
' + r'\s+
\n', '', html) + html = html.replace('\\<1>', '<1>') + return html + + +def html_diff(lhs_name, lhs, rhs_name, rhs): + """Compare two html blobs, ignoring expected differences between AsciiDoc + and Asciidoctor. The result is a generator for lines in the diff report. + If it is entirely empty then there is no diff. + """ + lhs_lines = normalize_html(lhs).splitlines() + rhs_lines = normalize_html(rhs).splitlines() + return difflib.unified_diff( + lhs_lines, + rhs_lines, + fromfile=lhs_name, + tofile=rhs_name, + lineterm='') + + +def html_file_diff(lhs, rhs): + """Compare two html files, ignoring expected differences between AsciiDoc + and Asciidoctor. The result is a generator for lines in the diff report. + If it is entirely empty then there is no diff. + """ + with open(lhs, encoding='utf-8') as lhs_file: + lhs_text = lhs_file.read() + with open(rhs, encoding='utf-8') as rhs_file: + rhs_text = rhs_file.read() + return html_diff(lhs, lhs_text, rhs, rhs_text) + + +if __name__ == '__main__': + import sys + if len(sys.argv) != 3: + print("Expected exactly 2 arguments but got %s" % sys.argv[1:]) + exit(1) + had_diff = False + for line in html_file_diff(sys.argv[1], sys.argv[2]): + had_diff = True + # print doesn't like to print utf-8 in all cases but buffer.write is ok + sys.stderr.buffer.write(line.encode('utf-8')) + sys.stderr.buffer.write("\n".encode('utf-8')) + exit(1 if had_diff else 0)