Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support a standard api for parsing media types #376

Merged
merged 10 commits into from
Oct 10, 2018
6 changes: 6 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Feature
The algorithm for matching offer ranges against header ranges is described
in the documentation. See https://github.com/Pylons/webob/pull/370

- Added ``acceptparse.Accept.parse_offer`` to codify what types of offers
are compatible with ``acceptparse.AcceptValidHeader.acceptable_offers``,
``acceptparse.AcceptMissingHeader.acceptable_offers``, and
``acceptparse.AcceptInvalidHeader.acceptable_offers``.
See https://github.com/Pylons/webob/pull/376

Compatibility
~~~~~~~~~~~~~

Expand Down
127 changes: 102 additions & 25 deletions src/webob/acceptparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
``Accept-Language``.
"""

from collections import namedtuple
import re
import textwrap
import warnings
Expand Down Expand Up @@ -74,6 +75,30 @@ def _list_1_or_more__compiled_re(element_re):
)


class AcceptOffer(namedtuple('AcceptOffer', ['type', 'subtype', 'params'])):
__slots__ = ()

SPECIFICITY_NONE = 1 # */*
SPECIFICITY_TYPE = 2 # text/*
SPECIFICITY_SUBTYPE = 3 # text/html
SPECIFICITY_PARAMS = 4 # text/html;charset=utf8

@property
def is_range(self):
return self.type == '*' or self.subtype == '*'

@property
def specificity(self):
if self.params:
return self.SPECIFICITY_PARAMS
elif self.subtype != '*':
return self.SPECIFICITY_SUBTYPE
elif self.type != '*':
return self.SPECIFICITY_TYPE
else:
return self.SPECIFICITY_NONE


class Accept(object):
"""
Represent an ``Accept`` header.
Expand Down Expand Up @@ -407,25 +432,70 @@ def generator(value):
)
return generator(value=value)

def _parse_and_normalize_offers(self, offers):
@classmethod
def parse_offer(cls, offer):
mmerickel marked this conversation as resolved.
Show resolved Hide resolved
"""
Parse an offer into its component parts.

:param offer: A media type or range in the format
``type/subtype[;params]``.
:return: A named tuple containing ``(*type*, *subtype*, *params*)``.

| *params* is a list containing ``(*parameter name*, *value*)``
values.

| The result also supports ``is_range`` and ``specificity``
properties. Specificity is a value from 1 to 4 where ``*/*``
is 1, ``text/*`` is 2, ``text/html`` is 3 and
``text/html;charset=utf8`` is 4.

:raises ValueError: If the offer does not match the required format.

"""
match = cls.media_type_compiled_re.match(offer)
if not match:
raise ValueError('Invalid value for an Accept offer.')

groups = match.groups()
offer_type, offer_subtype = groups[0].split('/')
offer_params = cls._parse_media_type_params(
media_type_params_segment=groups[1],
)
# offer_type, offer_subtype, offer_params, invalid, example
# == * == * true Y/N
# N N N N a/b
# N N Y N a/b;x=y
# N Y N N a/*
# N Y Y Y a/*;x=y
# Y N N Y */b
# Y N Y Y */b;x=y
# Y Y N N */*
# Y Y Y Y */*;x=y
# simplifies to (A and not B or B and C)
stevepiercy marked this conversation as resolved.
Show resolved Hide resolved
if (
(offer_type == '*' and offer_subtype != '*')
or (offer_subtype == '*' and offer_params)
):
raise ValueError('Invalid value for an Accept offer.')
return AcceptOffer(offer_type, offer_subtype, offer_params)

@classmethod
def _parse_and_normalize_offers(cls, offers):
"""
Throw out any offers that do not match the media type ABNF.
Throw out any offers that do not match the media range ABNF.

:return: A list of offers split into the format ``[offer_index,
offer_type_subtype, offer_media_type_params]``.
parsed_offer]``.

"""
lowercased_offers_parsed = []
parsed_offers = []
for index, offer in enumerate(offers):
match = self.media_type_compiled_re.match(offer.lower())
# we're willing to try to match any offer that matches the
# media type grammar can parse, but we'll throw out anything
# that doesn't fit the correct syntax - this is not saying that
# the media type is actually a real media type, just that it looks
# like one
if match:
lowercased_offers_parsed.append([index] + list(match.groups()))
return lowercased_offers_parsed
try:
parsed_offer = cls.parse_offer(offer.lower())
except ValueError:
continue
parsed_offers.append([index, parsed_offer])
return parsed_offers


class AcceptValidHeader(Accept):
Expand Down Expand Up @@ -822,15 +892,10 @@ def acceptable_offers(self, offers):
lowercased_offers_parsed = self._parse_and_normalize_offers(offers)

acceptable_offers_n_quality_factors = {}
for (
offer_index, offer_type_subtype, offer_media_type_params
) in lowercased_offers_parsed:
for offer_index, parsed_offer in lowercased_offers_parsed:
offer = offers[offer_index]
offer_type, offer_subtype = offer_type_subtype.split('/', 1)
offer_media_type_params = self._parse_media_type_params(
media_type_params_segment=offer_media_type_params,
)
offer_is_range = '*' in offer
offer_is_range = parsed_offer.is_range
offer_type, offer_subtype, offer_media_type_params = parsed_offer
for (
range_type_subtype, range_qvalue, range_media_type_params, __,
) in lowercased_ranges:
Expand All @@ -841,8 +906,17 @@ def acceptable_offers(self, offers):
# highest matching qvalue
if offer_is_range:
if (
offer_type_subtype == '*/*'
or offer_type == range_type and offer_subtype == '*'
# Accept: anything, offer=*/*
(offer_type == '*' and offer_subtype == '*')

# Accept: text/anything, offer=text/*
or (offer_type == range_type and offer_subtype == '*')

# Accept: */*, offer=anything
or (
range_type == '*' and range_subtype == '*'
and range_media_type_params == []
)
):
prev_match = acceptable_offers_n_quality_factors.get(offer)
if not prev_match or prev_match[0] < range_qvalue:
Expand All @@ -860,7 +934,10 @@ def acceptable_offers(self, offers):
# items in reverse order, so specificity 4, 3, 2, 1 correspond
# to 1, 2, 3, 4 in the list, respectively (so that higher
# specificity has higher precedence).
elif offer_type_subtype == range_type_subtype:
elif (
offer_type == range_type
and offer_subtype == range_subtype
):
if range_media_type_params == []:
# If offer_media_type_params == [], the offer and the
# range match exactly, with neither having media type
Expand Down Expand Up @@ -1291,7 +1368,7 @@ def acceptable_offers(self, offers):
"""
return [
(offers[offer_index], 1.0)
for offer_index, _, _
for offer_index, _
# avoid returning any offers that don't match the grammar so
# that the return values here are consistent with what would be
# returned in AcceptValidHeader
Expand Down
40 changes: 40 additions & 0 deletions tests/test_acceptparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,41 @@ def test_parse__valid_header(self, value, expected_list):
list_of_returned = list(returned)
assert list_of_returned == expected_list

@pytest.mark.parametrize('offer, expected_return, is_range, specificity', [
['text/html', ('text', 'html', []), False, 3],
[
'text/html;charset=utf8',
('text', 'html', [('charset', 'utf8')]),
False, 4,
],
[
'text/html;charset=utf8;x-version=1',
('text', 'html', [('charset', 'utf8'), ('x-version', '1')]),
False, 4,
],
['text/*', ('text', '*', []), True, 2],
['*/*', ('*', '*', []), True, 1],
])
def test_parse_offer__valid(self, offer, expected_return, is_range, specificity):
result = Accept.parse_offer(offer)
assert result == expected_return
assert result.is_range == is_range
assert result.specificity == specificity

@pytest.mark.parametrize('offer', [
'',
'foo',
'foo/bar/baz',
'*/plain',
'*/plain;charset=utf8',
'*/plain;charset=utf8;x-version=1',
'*/*;charset=utf8',
'text/*;charset=utf8',
])
def test_parse_offer__invalid(self, offer):
with pytest.raises(ValueError):
Accept.parse_offer(offer)


class TestAcceptValidHeader(object):
def test_parse__inherited(self):
Expand Down Expand Up @@ -1055,6 +1090,11 @@ def test_acceptable_offers__invalid_offers(
['text/*', '*/*', 'text/html', 'text/html;level=1', 'image/*'],
[('text/*', 0.7), ('*/*', 0.7), ('text/html;level=1', 0.7)],
),
(
'*/*',
['text/*'],
[('text/*', 1.0)],
),
(
'',
['text/*', '*/*', 'text/html', 'text/html;level=1', 'image/*'],
Expand Down