Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support a standard api for parsing media types #376

Merged
merged 10 commits into from
Oct 10, 2018
9 changes: 5 additions & 4 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ Feature

- Add Request.remote_host, exposing REMOTE_HOST environment variable.

- Added support for media ranges as offers when matching against the
``Accept`` header via ``acceptparse.AcceptValidHeader.acceptable_offers``.
The algorithm for matching offer ranges against header ranges is described
in the documentation. See https://github.com/Pylons/webob/pull/370
- Added ``acceptparse.Accept.parse_offer`` to codify what types of offers
are compatible with ``acceptparse.AcceptValidHeader.acceptable_offers``,
``acceptparse.AcceptMissingHeader.acceptable_offers``, and
``acceptparse.AcceptInvalidHeader.acceptable_offers``.
See https://github.com/Pylons/webob/pull/376

Compatibility
~~~~~~~~~~~~~
Expand Down
101 changes: 54 additions & 47 deletions src/webob/acceptparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
``Accept-Language``.
"""

from collections import namedtuple
import re
import textwrap
import warnings
Expand Down Expand Up @@ -74,6 +75,9 @@ def _list_1_or_more__compiled_re(element_re):
)


AcceptOffer = namedtuple('AcceptOffer', ['type', 'subtype', 'params'])


class Accept(object):
"""
Represent an ``Accept`` header.
Expand Down Expand Up @@ -407,25 +411,51 @@ def generator(value):
)
return generator(value=value)

def _parse_and_normalize_offers(self, offers):
@classmethod
def parse_offer(cls, offer):
mmerickel marked this conversation as resolved.
Show resolved Hide resolved
"""
Throw out any offers that do not match the media type ABNF.
Parse an offer into its component parts.

:param offer: A media type or range in the format
``type/subtype[;params]``.
:return: A named tuple containing ``(*type*, *subtype*, *params*)``.

| *params* is a list containing ``(*parameter name*, *value*)``
values.

:raises ValueError: If the offer does not match the required format.

"""
match = cls.media_type_compiled_re.match(offer.lower())
if not match:
raise ValueError('Invalid value for an Accept offer.')

groups = match.groups()
offer_type, offer_subtype = groups[0].split('/')
offer_params = cls._parse_media_type_params(
media_type_params_segment=groups[1],
)
if offer_type == '*' or offer_subtype == '*':
raise ValueError('Invalid value for an Accept offer.')
return AcceptOffer(offer_type, offer_subtype, offer_params)

@classmethod
def _parse_and_normalize_offers(cls, offers):
"""
Throw out any offers that do not match the media range ABNF.

:return: A list of offers split into the format ``[offer_index,
offer_type_subtype, offer_media_type_params]``.
parsed_offer]``.

"""
lowercased_offers_parsed = []
parsed_offers = []
for index, offer in enumerate(offers):
match = self.media_type_compiled_re.match(offer.lower())
# we're willing to try to match any offer that matches the
# media type grammar can parse, but we'll throw out anything
# that doesn't fit the correct syntax - this is not saying that
# the media type is actually a real media type, just that it looks
# like one
if match:
lowercased_offers_parsed.append([index] + list(match.groups()))
return lowercased_offers_parsed
try:
parsed_offer = cls.parse_offer(offer)
except ValueError:
continue
parsed_offers.append([index, parsed_offer])
return parsed_offers


class AcceptValidHeader(Accept):
Expand Down Expand Up @@ -790,12 +820,8 @@ def acceptable_offers(self, offers):
This uses the matching rules described in :rfc:`RFC 7231, section 5.3.2
<7231#section-5.3.2>`.

Any offers that do not match the media type grammar will be ignored.

This function also supports media ranges (without media type
parameters) but without any specificity. An offered media range is
assigned the highest q-value of any media range from the header that
would match any media type that could be derived from the offer.
Any offers that cannot be parsed via
:meth:`.Accept.parse_offer` will be ignored.

:param offers: ``iterable`` of ``str`` media types (media types can
include media type parameters)
Expand All @@ -822,45 +848,25 @@ def acceptable_offers(self, offers):
lowercased_offers_parsed = self._parse_and_normalize_offers(offers)

acceptable_offers_n_quality_factors = {}
for (
offer_index, offer_type_subtype, offer_media_type_params
) in lowercased_offers_parsed:
for offer_index, parsed_offer in lowercased_offers_parsed:
offer = offers[offer_index]
offer_type, offer_subtype = offer_type_subtype.split('/', 1)
offer_media_type_params = self._parse_media_type_params(
media_type_params_segment=offer_media_type_params,
)
offer_is_range = '*' in offer
offer_type, offer_subtype, offer_media_type_params = parsed_offer
for (
range_type_subtype, range_qvalue, range_media_type_params, __,
) in lowercased_ranges:
range_type, range_subtype = range_type_subtype.split('/', 1)

# if a media range is supplied as an offer then specificity is
# unimportant, we'll just compare for match and use the
# highest matching qvalue
if offer_is_range:
if (
offer_type_subtype == '*/*'
or offer_type == range_type and offer_subtype == '*'
):
prev_match = acceptable_offers_n_quality_factors.get(offer)
if not prev_match or prev_match[0] < range_qvalue:
acceptable_offers_n_quality_factors[offer] = (
range_qvalue, # qvalue of matched range
offer_index,
4, # unused for offers that are media ranges
)
continue

# The specificity values below are based on the list in the
# example in RFC 7231 section 5.3.2 explaining how "media
# ranges can be overridden by more specific media ranges or
# specific media types". We assign specificity to the list
# items in reverse order, so specificity 4, 3, 2, 1 correspond
# to 1, 2, 3, 4 in the list, respectively (so that higher
# specificity has higher precedence).
elif offer_type_subtype == range_type_subtype:
if (
offer_type == range_type
and offer_subtype == range_subtype
):
if range_media_type_params == []:
# If offer_media_type_params == [], the offer and the
# range match exactly, with neither having media type
Expand Down Expand Up @@ -1279,7 +1285,8 @@ def acceptable_offers(self, offers):
"""
Return the offers that are acceptable according to the header.

Any offers that do not match the media type grammar will be ignored.
Any offers that cannot be parsed via
:meth:`.Accept.parse_offer` will be ignored.

:param offers: ``iterable`` of ``str`` media types (media types can
include media type parameters)
Expand All @@ -1291,7 +1298,7 @@ def acceptable_offers(self, offers):
"""
return [
(offers[offer_index], 1.0)
for offer_index, _, _
for offer_index, _
# avoid returning any offers that don't match the grammar so
# that the return values here are consistent with what would be
# returned in AcceptValidHeader
Expand Down
45 changes: 38 additions & 7 deletions tests/test_acceptparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,37 @@ def test_parse__valid_header(self, value, expected_list):
list_of_returned = list(returned)
assert list_of_returned == expected_list

@pytest.mark.parametrize('offer, expected_return', [
['text/html', ('text', 'html', [])],
[
'text/html;charset=utf8',
('text', 'html', [('charset', 'utf8')]),
],
[
'text/html;charset=utf8;x-version=1',
('text', 'html', [('charset', 'utf8'), ('x-version', '1')]),
],
])
def test_parse_offer__valid(self, offer, expected_return):
result = Accept.parse_offer(offer)
assert result == expected_return

@pytest.mark.parametrize('offer', [
'',
'foo',
'foo/bar/baz',
'*/plain',
'*/plain;charset=utf8',
'*/plain;charset=utf8;x-version=1',
'*/*;charset=utf8',
'text/*;charset=utf8',
'text/*',
'*/*',
])
def test_parse_offer__invalid(self, offer):
with pytest.raises(ValueError):
Accept.parse_offer(offer)


class TestAcceptValidHeader(object):
def test_parse__inherited(self):
Expand Down Expand Up @@ -1047,20 +1078,20 @@ def test_acceptable_offers__invalid_offers(
('text/plain', 0.3),
],
),
(
'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7',
['*/*', 'text/*', 'text/html', 'image/*'],
[('*/*', 0.7), ('text/*', 0.7), ('text/html', 0.5)],
),
(
'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7',
['text/*', '*/*', 'text/html', 'image/*'],
[('text/*', 0.7), ('*/*', 0.7), ('text/html', 0.5)],
[('text/html', 0.5)],
),
(
'text/html;level=1;q=0.7',
['text/*', '*/*', 'text/html', 'text/html;level=1', 'image/*'],
[('text/*', 0.7), ('*/*', 0.7), ('text/html;level=1', 0.7)],
[('text/html;level=1', 0.7)],
),
(
'*/*',
['text/*'],
[],
),
(
'',
Expand Down