Skip to content

Commit

Permalink
Fix: enable Python 3.12 to parse templates (webpy#785)
Browse files Browse the repository at this point in the history
* Fix: enable Python 3.12 to parse templates

Closes webpy#784.

This PR fixes the inability of `web.py` to run on Python 3.12, and it
does so by matching any unmatched `"` in a string.

In Python 3.12, changes to `tokenize.generate_tokens()` requires all
quotes to be matched, and the `web.py` template parser hands
unterminated strings to `tokenize.generate_tokens()` quite frequently.

This PR makes a tacit assumption that we should avoid rewriting the
template parser if possible while also allowing `web.py` to run on
Python 3.12.

To that end, it handles every `TokenError` that is because of unmatched
string literals by adding a `"` to the line. Although this appears to
work, I am aware that appending a `"` to each line that has an unmatched
`"` is less than ideal. However, it seemed a very easy way to avoid
rewriting the template parser. I am absolutely open to other approaches.

*IF* this approach looks as if it has legs, I can can look more into
where that "extra" `"` goes, why it seems not to matter, and write some
unit tests.

With regard to testing, I tested this with as many pages on Open Library
that I could (via the local development environment), and it seems to
work.

* noqa: complexity and excess statements for read_expr()

* Add suggestions from @tfmorris.

* Switch to more-itertools.peekable

 and set linter limits back down again

* Fix off-by-1 error and add test coverage

* Linting fixes

* Add `more_itertools` as a dependency

`more_itertools.peekable()` is used now.

`cheroot` already requires `more_itertools`, so this depedency is
already installed anyway, but adding it as a dependency for `webpy`
itself will ensure that if `cheroot` drops the `more_itertools`,
dependency, `webpy` will still require it.

---------

Co-authored-by: Tom Morris <tfmorris@gmail.com>
  • Loading branch information
scottbarnes and tfmorris authored Feb 21, 2024
1 parent 5709b1f commit d364932
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 48 deletions.
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,13 @@ show-source = true
target-version = "py38"

[tool.ruff.mccabe]
max-complexity = 26
max-complexity = 20

[tool.ruff.pylint]
allow-magic-value-types = ["int", "str"]
max-args = 9 # default is 5
max-branches = 17 # default is 12
max-returns = 8 # default is 6
max-statements = 51 # default is 50

[tool.codespell]
ignore-words-list = "asend,gae"
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
cheroot>=6.0.0
more_itertools>=2.6
multipart>=0.2.4
64 changes: 63 additions & 1 deletion tests/test_template.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import unittest

import web
from web.template import SecurityError, Template
from web.template import ExpressionNode, Parser, SecurityError, Template


class TestItem:
__test__ = False # silence collection warning from test framework

def __init__(self):
self.id = 12345


class _TestResult:
Expand Down Expand Up @@ -43,6 +50,61 @@ def test_overridden(self):
f = t(tpl, globals={"print": lambda x: x})
assert repr(f()) == "'blah\\n'"

def test_quotes(self):
template = 'a="$foo" <p>'
f = t(template, globals={"foo": "bar"})
assert repr(f()) == "'a=\"bar\" <p>\\n'"

def test_accessor(self):
template = 'a="$foo.id"<p>'
f = t(template, globals={"foo": TestItem()})
assert repr(f()) == "'a=\"12345\"<p>\\n'"

def test_href(self):
template = '<a href="/del/$item.id">Delete</a>'
f = t(template, globals={"item": TestItem()})
assert repr(f()) == "'<a href=\"/del/12345\">Delete</a>\\n'"


class TestParser(unittest.TestCase):
"""
Test the Parser.
Tests functions from the Parser class as if the following template were loaded:
test_template = '''$def with (back, docs)
$var title: Index
<p><a href="$back">&larr; Back to Index</a></p>
<ul>
$for path, title in docs:
<li><a href="$path">$title</a></li>
</ul>
'''
"""

def test_read_expr(self) -> None:
"""
Test Parser.read_expr() with the `text` values it would get from
`Parser.read_node(), if processing `test_template`.
"""
got = Parser().read_expr('back">&larr; Back to Index</a></p>\n')
expression_node = got[0]
assert isinstance(expression_node, ExpressionNode)
assert repr(expression_node) == "$back"
assert got[1] == '">&larr; Back to Index</a></p>\n'

got = Parser().read_expr('path">$title</a></li>\n')
expression_node = got[0]
assert isinstance(expression_node, ExpressionNode)
assert repr(expression_node) == "$path"
assert got[1] == '">$title</a></li>\n'

got = Parser().read_expr("title</a></li>\n")
expression_node = got[0]
assert isinstance(expression_node, ExpressionNode)
assert repr(expression_node) == "$title"
assert got[1] == "</a></li>\n"


class TestRender:
def test_template_without_ext(self, tmpdir):
Expand Down
3 changes: 2 additions & 1 deletion web/browser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Browser to test web applications.
(from web.py)
"""

import os
import webbrowser
from http.cookiejar import CookieJar
Expand Down Expand Up @@ -46,7 +47,7 @@ def reset(self):

def build_opener(self):
"""Builds the opener using (urllib2/urllib.request).build_opener.
Subclasses can override this function to prodive custom openers.
Subclasses can override this function to provide custom openers.
"""
return urllib_build_opener()

Expand Down
1 change: 1 addition & 0 deletions web/contrib/template.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Interface to various templating engines.
"""

import os.path

__all__ = ["render_cheetah", "render_genshi", "render_mako", "cache"]
Expand Down
1 change: 1 addition & 0 deletions web/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Database API
(part of web.py)
"""

import ast
import datetime
import os
Expand Down
1 change: 0 additions & 1 deletion web/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
(from web.py)
"""


import datetime
import re
import socket
Expand Down
94 changes: 51 additions & 43 deletions web/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,15 @@
import ast
import builtins
import glob
import itertools
import os
import sys
import token
import tokenize
from functools import partial

from more_itertools import peekable

from .net import websafe
from .utils import re_compile, safestr, safeunicode, storage
from .webapi import config
Expand Down Expand Up @@ -242,7 +246,7 @@ def read_keyword(self, text):
line, text = splitline(text)
return StatementNode(line.strip() + "\n"), text

def read_expr(self, text, escape=True):
def read_expr(self, text, escape=True): # noqa: C901, PLR0915
"""Reads a python expression from the text and returns the expression and remaining text.
expr -> simple_expr | paren_expr
Expand Down Expand Up @@ -271,10 +275,10 @@ def simple_expr():
extended_expr()

def identifier():
next(tokens)
return next(tokens)

def extended_expr():
lookahead = tokens.lookahead()
lookahead = tokens.peek()
if lookahead is None:
return
elif lookahead.value == ".":
Expand All @@ -288,7 +292,7 @@ def extended_expr():
def attr_access():
from token import NAME # python token constants

if tokens.lookahead2().type == NAME:
if tokens[1].type == NAME:
next(tokens) # consume dot
identifier()
extended_expr()
Expand All @@ -297,7 +301,7 @@ def paren_expr():
begin = next(tokens).value
end = parens[begin]
while True:
if tokens.lookahead().value in parens:
if tokens.peek().value in parens:
paren_expr()
else:
t = next(tokens)
Expand All @@ -306,57 +310,61 @@ def paren_expr():

parens = {"(": ")", "[": "]", "{": "}"}

def get_tokens(text):
def get_tokens(text: str):
"""tokenize text using python tokenizer.
Python tokenizer ignores spaces, but they might be important in some cases.
This function introduces dummy space tokens when it identifies any ignored space.
Each token is a storage object containing type, value, begin and end.
"""
i = iter([text])
readline = lambda: next(i)
end = None
for t in tokenize.generate_tokens(readline):
t = storage(type=t[0], value=t[1], begin=t[2], end=t[3])
if end is not None and end != t.begin:
_, x1 = end
_, x2 = t.begin
yield storage(type=-1, value=text[x1:x2], begin=end, end=t.begin)
end = t.end
yield t

class BetterIter:
"""Iterator like object with 2 support for 2 look aheads."""

def __init__(self, items):
self.iteritems = iter(items)
self.items = []
self.position = 0
self.current_item = None

def lookahead(self):
if len(self.items) <= self.position:
self.items.append(self._next())
return self.items[self.position]
def tokenize_text(input_text):
i = iter([input_text])
readline = lambda: next(i)
end = None
for t in tokenize.generate_tokens(readline):
t = storage(type=t[0], value=t[1], begin=t[2], end=t[3])
if end is not None and end != t.begin:
_, x1 = end
_, x2 = t.begin
yield storage(
type=-1, value=input_text[x1:x2], begin=end, end=t.begin
)
end = t.end
yield t

try:
yield from tokenize_text(text)
except tokenize.TokenError as e:
# Things like unterminated string literals or EOF in multi-line literals will raise exceptions
# tokenize the error free portion, then return an error token with the rest of the text
error_pos = e.args[1][1] - 1
fixed_text = text[0:error_pos]
yield from itertools.chain(
tokenize_text(fixed_text),
error_token_generator(text, error_pos + 1, len(text)),
)

def error_token_generator(text, start, end):
yield storage(
type=token.ERRORTOKEN, value=text[start:], begin=start, end=end
)

def _next(self):
try:
return next(self.iteritems)
except StopIteration:
return None
class peekable2(peekable):
"""
A peekable class which caches the last item returned by next()
"""

def lookahead2(self):
if len(self.items) <= self.position + 1:
self.items.append(self._next())
return self.items[self.position + 1]
def __init__(self, iterable):
super().__init__(iterable)
self.current_item = None

def __next__(self):
self.current_item = self.lookahead()
self.position += 1
self.current_item = super().__next__()
return self.current_item

tokens = BetterIter(get_tokens(text))
tokens = peekable2(get_tokens(text))

if tokens.lookahead().value in parens:
if tokens.peek().value in parens:
paren_expr()
else:
simple_expr()
Expand Down
1 change: 1 addition & 0 deletions web/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""test utilities
(part of web.py)
"""

import doctest
import sys
import unittest
Expand Down

0 comments on commit d364932

Please sign in to comment.