diff --git a/.travis.yml b/.travis.yml index 844b19f4..3602a983 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,9 +3,7 @@ env: - REQUIREMENTS=true - REQUIREMENTS=false python: - - "2.6" - "2.7" - - "3.2" - "3.3" - "3.4" - "3.5" @@ -14,7 +12,6 @@ python: # command to install dependencies install: - if [[ $REQUIREMENTS == true ]] ; then pip install -r requirements.txt ; fi - - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]] ; then pip install coverage==3.7.1; fi - pip install coveralls pytest pytest-cov coverage codecov - pip install -e . - if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy-5.4' ]] ; then pip install regex; fi diff --git a/CHANGELOG.textile b/CHANGELOG.textile index 5047e1af..c998dd02 100644 --- a/CHANGELOG.textile +++ b/CHANGELOG.textile @@ -1,5 +1,13 @@ h1. Textile Changelog +h2. Version 3.0.0 +* Drop support for Python 2.6 and 3.2. +* Update to the current version of html5lib +* Bugfixes: +** Fix handling of HTML entities in extended pre blocks. ("#55":https://github.com/textile/python-textile/issues/55) +** Empty definitions in definition lists raised an exception ("#56":https://github.com/textile/python-textile/issues/56) +** Fix handling of unicode in img attributes ("#58":https://github.com/textile/python-textile/issues/58) + h2. Version 2.3.16 * Bugfixes: ** Fix processing of extended code blocks ("#50":https://github.com/textile/python-textile/issues/50) diff --git a/README.textile b/README.textile index a363721b..56155508 100644 --- a/README.textile +++ b/README.textile @@ -39,4 +39,4 @@ bc.. import textile h3. Notes: -* Active development supports Python 2.6 or later (including Python 3.2+). +* Active development supports Python 2.7 or later (including Python 3.3+). diff --git a/requirements.txt b/requirements.txt index 5cfb4428..a477d4b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ -html5lib==0.999 -Pillow==3.0.0 \ No newline at end of file +html5lib>=1.0b10 +Pillow==3.0.0 +regex diff --git a/setup.py b/setup.py index c12d3e00..e2f49baf 100644 --- a/setup.py +++ b/setup.py @@ -55,10 +55,15 @@ def get_version(): 'Programming Language :: Python :: 3.6', ], keywords='textile,text,html markup', - install_requires=['six',], + install_requires=[ + 'six', + 'html5lib>=0.999999999', + ], extras_require={ ':python_version=="2.6"': ['ordereddict>=1.1'], - 'develop': ['regex', 'pytest', 'pytest-cov'], + 'develop': ['pytest', 'pytest-cov'], + 'imagesize': ['Pillow>=3.0.0'], + 'regex': ['regex'], }, entry_points={'console_scripts': ['pytextile=textile.__main__:main']}, setup_requires=['pytest-runner'], diff --git a/tests/fixtures/README.txt b/tests/fixtures/README.txt index 426c9aa4..ba867308 100644 --- a/tests/fixtures/README.txt +++ b/tests/fixtures/README.txt @@ -42,5 +42,5 @@

Notes:

\ No newline at end of file diff --git a/tests/test_github_issues.py b/tests/test_github_issues.py index 0aeba4d8..27befff7 100644 --- a/tests/test_github_issues.py +++ b/tests/test_github_issues.py @@ -86,6 +86,11 @@ def test_github_issue_30(): expect = '\t

Tëxtíle

' assert result == expect + text ='!http://lala.com/lol.gif(♡ imáges)!' + result = textile.textile(text) + expect = '\t

♡ imáges

' + assert result == expect + def test_github_issue_36(): text = '"Chögyam Trungpa":https://www.google.com/search?q=Chögyam+Trungpa' result = textile.textile(text) @@ -199,3 +204,61 @@ def test_github_issue_52(): '\n\t\t\tSecond Header ' '\n\t\t\n\t') assert result == expect + +def test_github_issue_55(): + """Incorrect handling of quote entities in extended pre block""" + test = ('pre.. this is the first line\n\nbut "quotes" in an extended pre ' + 'block need to be handled properly.') + result = textile.textile(test) + expect = ('
this is the first line\n\nbut "quotes" in an '
+              'extended pre block need to be handled properly.
') + assert result == expect + + # supplied input + test = ('pre.. import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;' + '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.' + 'RescheduleTask;\n\nimport java.util.concurrent.' + 'ScheduledExecutorService;\nimport java.util.concurrent.TimeUnit;' + '\n\n/**\n* @author ustits\n*/\npublic abstract class ' + 'MainService extends RescheduleTask implements Context {\n\n' + 'private static final Logger log = LoggerFactory.getLogger(' + 'MainService.class);\nprivate final ScheduledExecutorService ' + 'scheduler;\n\nprivate boolean isFirstRun = true;\nprivate T ' + 'configs;\n\npublic MainService(final ScheduledExecutorService ' + 'scheduler) {\nsuper(scheduler);\nthis.scheduler = scheduler;\n}\n' + '\n@Override\npublic void setConfig(final T configs) {\nthis.' + 'configs = configs;\nif (isFirstRun) {\nscheduler.schedule(this, ' + '0, TimeUnit.SECONDS);\nisFirstRun = false;\n}\n}\n\n@Override\n' + 'public void stop() {\nsuper.stop();\nscheduler.shutdown();\ntry {' + '\nscheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} ' + 'catch (InterruptedException ie) {\nlog.warn("Unable to wait for ' + 'syncs termination", ie);\nThread.currentThread().interrupt();\n}' + '\n}\n\nprotected final T getConfigs() {\nreturn configs;\n}\n}') + result = textile.textile(test) + expect = ('
import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
+              '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
+              'RescheduleTask;\n\nimport java.util.concurrent.'
+              'ScheduledExecutorService;\nimport java.util.concurrent.'
+              'TimeUnit;\n\n/**\n* @author ustits\n*/\npublic abstract class '
+              'MainService<T> extends RescheduleTask implements '
+              'Context<T> {\n\nprivate static final Logger log = '
+              'LoggerFactory.getLogger(MainService.class);\nprivate final '
+              'ScheduledExecutorService scheduler;\n\nprivate boolean '
+              'isFirstRun = true;\nprivate T configs;\n\npublic MainService('
+              'final ScheduledExecutorService scheduler) {\nsuper(scheduler);'
+              '\nthis.scheduler = scheduler;\n}\n\n@Override\npublic void '
+              'setConfig(final T configs) {\nthis.configs = configs;\nif ('
+              'isFirstRun) {\nscheduler.schedule(this, 0, TimeUnit.SECONDS);'
+              '\nisFirstRun = false;\n}\n}\n\n@Override\npublic void stop() {'
+              '\nsuper.stop();\nscheduler.shutdown();\ntry {\nscheduler.'
+              'awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} catch '
+              '(InterruptedException ie) {\nlog.warn("Unable to wait '
+              'for syncs termination", ie);\nThread.currentThread().'
+              'interrupt();\n}\n}\n\nprotected final T getConfigs() {\n'
+              'return configs;\n}\n}
') + assert result == expect + +def test_issue_56(): + result = textile.textile("- :=\n-") + expect = '
\n
' + assert result == expect diff --git a/tests/test_textile.py b/tests/test_textile.py index dd069fb8..86a7d853 100644 --- a/tests/test_textile.py +++ b/tests/test_textile.py @@ -67,22 +67,18 @@ def test_autolinking(): def test_sanitize(): test = "a paragraph of benign text" result = "\t

a paragraph of benign text

" - try: - expect = textile.Textile().parse(test, sanitize=True) - assert result == expect - - test = """

a paragraph of evil text

""" - result = '

a paragraph of evil text

' - expect = textile.Textile().parse(test, sanitize=True) - assert result == expect - - test = """

a paragraph of benign text
and more text

""" - result = '

a paragraph of benign text
\nand more text

' - expect = textile.Textile(html_type='html5').parse(test, sanitize=True) - assert result == expect - except Exception as e: - message = '{0}'.format(e) - assert "html5lib not available" in message + expect = textile.Textile().parse(test, sanitize=True) + assert result == expect + + test = """

a paragraph of evil text

""" + result = '

a paragraph of evil text

' + expect = textile.Textile().parse(test, sanitize=True) + assert result == expect + + test = """

a paragraph of benign text
and more text

""" + result = '

a paragraph of benign text
\nand more text

' + expect = textile.Textile(html_type='html5').parse(test, sanitize=True) + assert result == expect def test_imagesize(): PIL = pytest.importorskip('PIL') diff --git a/textile/__init__.py b/textile/__init__.py index c019f413..bb7829f7 100644 --- a/textile/__init__.py +++ b/textile/__init__.py @@ -9,12 +9,3 @@ __all__ = ['textile', 'textile_restricted'] __version__ = VERSION - - -if sys.version_info[:2] == (2, 6): - warnings.warn( - "Python 2.6 is no longer supported by the Python core team, please " - "upgrade your Python. A future version of textile will drop support " - "for Python 2.6", - DeprecationWarning - ) diff --git a/textile/core.py b/textile/core.py index efb1c4dc..7572a468 100644 --- a/textile/core.py +++ b/textile/core.py @@ -32,10 +32,7 @@ from textile.objects import Block, Table -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict try: @@ -506,7 +503,10 @@ def block(self, text): block.outer_atts) line = "\t{0}".format(line) else: - line = self.graf(line) + if block.tag == 'pre': + line = self.shelve(encode_html(line, quotes=True)) + else: + line = self.graf(line) line = self.doPBr(line) line = line.replace('
', '
') @@ -1184,6 +1184,8 @@ def fRCList(self, match): # parse the attributes and content m = re.match(r'^[-]+({0})[ .](.*)$'.format(cls_re_s), line, flags=re.M | re.S) + if not m: + continue atts, content = m.groups() # cleanup diff --git a/textile/objects/block.py b/textile/objects/block.py index 5c613211..7002ecaf 100644 --- a/textile/objects/block.py +++ b/textile/objects/block.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict try: import regex as re except ImportError: diff --git a/textile/regex_strings.py b/textile/regex_strings.py index a1520726..f7c6f127 100644 --- a/textile/regex_strings.py +++ b/textile/regex_strings.py @@ -19,7 +19,6 @@ 'char': r'(?:[^\p{Zs}\v])', } except ImportError: - import re from sys import maxunicode upper_re_s = "".join( [six.unichr(c) for c in six.moves.range(maxunicode) if six.unichr( diff --git a/textile/tools/sanitizer.py b/textile/tools/sanitizer.py index 4fc8fb2c..3c7209c6 100644 --- a/textile/tools/sanitizer.py +++ b/textile/tools/sanitizer.py @@ -3,18 +3,9 @@ def sanitize(string): Ensure that the text does not contain any malicious HTML code which might break the page. """ - try: - import html5lib - from html5lib import sanitizer, serializer, treewalkers - except ImportError: - raise Exception("html5lib not available") + from html5lib import parseFragment, serialize - p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer) - tree = p.parseFragment(string) - - walker = treewalkers.getTreeWalker("etree") - stream = walker(tree) - - s = serializer.htmlserializer.HTMLSerializer(omit_optional_tags=False, - quote_attr_values=True) - return s.render(stream) + parsed = parseFragment(string) + clean = serialize(parsed, sanitize=True, omit_optional_tags=False, + quote_attr_values='always') + return clean diff --git a/textile/utils.py b/textile/utils.py index c9750245..fa21f058 100644 --- a/textile/utils.py +++ b/textile/utils.py @@ -10,10 +10,7 @@ urlparse = urllib.parse.urlparse HTMLParser = html_parser.HTMLParser -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict from xml.etree import ElementTree @@ -50,6 +47,8 @@ def generate_tag(tag, content, attributes=None): content are strings, the attributes argument is a dictionary. As a convenience, if the content is ' /', a self-closing tag is generated.""" content = six.text_type(content) + # In PY2, ElementTree tostringlist only works with bytes, not with + # unicode(). enc = 'unicode' if six.PY2: enc = 'UTF-8' @@ -60,21 +59,12 @@ def generate_tag(tag, content, attributes=None): # adding text by assigning it to element_tag.text. That results in # non-ascii text being html-entity encoded. Not bad, but not entirely # matching php-textile either. - try: - element_tag = ElementTree.tostringlist(element, encoding=enc, - method='html') - element_tag.insert(len(element_tag) - 1, content) - element_text = ''.join(element_tag) - except AttributeError: - # Python 2.6 doesn't have the tostringlist method, so we have to treat - # it differently. - element_tag = ElementTree.tostring(element, encoding=enc) - element_text = re.sub(r"<\?xml version='1.0' encoding='UTF-8'\?>\n", - '', element_tag) - if content != six.text_type(' /'): - element_text = element_text.rstrip(' />') - element_text = six.text_type('{0}>{1}').format(six.text_type( - element_text), content, tag) + element_tag = ElementTree.tostringlist(element, encoding=enc, + method='html') + if six.PY2: + element_tag = [v.decode(enc) for v in element_tag] + element_tag.insert(len(element_tag) - 1, content) + element_text = ''.join(element_tag) return element_text def has_raw_text(text): diff --git a/textile/version.py b/textile/version.py index beb7a8e4..aaa42644 100644 --- a/textile/version.py +++ b/textile/version.py @@ -1 +1 @@ -VERSION = '2.3.16' +VERSION = '3.0.0'