diff --git a/weasyprint/__init__.py b/weasyprint/__init__.py index 44ab6f9a0..db0051680 100644 --- a/weasyprint/__init__.py +++ b/weasyprint/__init__.py @@ -29,7 +29,7 @@ import lxml.etree from .urls import (default_url_fetcher, wrap_url_fetcher, - path2url, ensure_url, url_is_absolute) + path2url, ensure_url, url_is_absolute, URLError) from .logger import LOGGER # Some import are at the end of the file (after the CSS class) is defined # to work around circular imports. @@ -77,7 +77,7 @@ def __init__(self, guess=None, filename=None, url=None, file_obj=None, source_type, source, base_url, protocol_encoding = _select_source( guess, filename, url, file_obj, string, tree, base_url, - url_fetcher) + url_fetcher, required=True) if source_type == 'tree': result = source @@ -241,7 +241,8 @@ def __init__(self, guess=None, filename=None, url=None, file_obj=None, def _select_source(guess=None, filename=None, url=None, file_obj=None, string=None, tree=None, base_url=None, - url_fetcher=default_url_fetcher, check_css_mime_type=False): + url_fetcher=default_url_fetcher, check_css_mime_type=False, + required=False): """ Check that only one input is not None, and return it with the normalized ``base_url``. @@ -261,26 +262,34 @@ def _select_source(guess=None, filename=None, url=None, file_obj=None, type_ = 'filename' return _select_source( base_url=base_url, url_fetcher=url_fetcher, - check_css_mime_type=check_css_mime_type, + check_css_mime_type=check_css_mime_type, required=required, **{type_: guess}) if nones == [True, False, True, True, True, True]: if base_url is None: base_url = path2url(filename) return 'filename', filename, base_url, None if nones == [True, True, False, True, True, True]: - result = url_fetcher(url) - if check_css_mime_type and result['mime_type'] != 'text/css': - LOGGER.warn( - 'Unsupported stylesheet type %s for %s', - result['mime_type'], result['redirected_url']) + try: + result = url_fetcher(url) + except URLError as e: + if not required: + LOGGER.warn('Failed to load resource %s: %s', url, e) return 'string', '', base_url, None - protocol_encoding = result.get('encoding') - if base_url is None: - base_url = result.get('redirected_url', url) - if 'string' in result: - return 'string', result['string'], base_url, protocol_encoding + else: + raise e else: - return 'file_obj', result['file_obj'], base_url, protocol_encoding + if check_css_mime_type and result['mime_type'] != 'text/css': + LOGGER.warn( + 'Unsupported stylesheet type %s for %s', + result['mime_type'], result['redirected_url']) + return 'string', '', base_url, None + protocol_encoding = result.get('encoding') + if base_url is None: + base_url = result.get('redirected_url', url) + if 'string' in result: + return 'string', result['string'], base_url, protocol_encoding + else: + return 'file_obj', result['file_obj'], base_url, protocol_encoding if nones == [True, True, True, False, True, True]: if base_url is None: # filesystem file-like objects have a 'name' attribute. diff --git a/weasyprint/compat.py b/weasyprint/compat.py index 5787839c1..2c86ecabd 100644 --- a/weasyprint/compat.py +++ b/weasyprint/compat.py @@ -16,7 +16,7 @@ import email -__all__ = ['Request', 'base64_decode', 'base64_encode', 'basestring', +__all__ = ['Request', 'URLError', 'base64_decode', 'base64_encode', 'basestring', 'ints_from_bytes', 'iteritems', 'izip', 'parse_email', 'parse_qs', 'pathname2url', 'quote', 'unicode', 'unquote', 'unquote_to_bytes', 'urlencode', 'urljoin', 'urlopen', 'urlopen_contenttype', @@ -29,6 +29,7 @@ urljoin, urlsplit, quote, unquote, unquote_to_bytes, parse_qs, urlencode, uses_relative as urlparse_uses_relative) from urllib.request import urlopen, Request, pathname2url + from urllib.error import URLError from array import array from base64 import (decodebytes as base64_decode, encodebytes as base64_encode) @@ -60,7 +61,7 @@ def ints_from_bytes(byte_string): # Python 2 from urlparse import (urljoin, urlsplit, parse_qs, uses_relative as urlparse_uses_relative) - from urllib2 import urlopen, Request + from urllib2 import urlopen, Request, URLError from urllib import pathname2url, quote, unquote, urlencode from array import array as _array from itertools import izip, imap diff --git a/weasyprint/urls.py b/weasyprint/urls.py index 25636dc55..56f77e240 100644 --- a/weasyprint/urls.py +++ b/weasyprint/urls.py @@ -22,7 +22,7 @@ from .logger import LOGGER from .compat import ( urljoin, urlsplit, quote, unquote, unquote_to_bytes, urlopen_contenttype, - Request, parse_email, pathname2url, unicode, base64_decode) + Request, parse_email, pathname2url, unicode, base64_decode, URLError) # Unlinke HTML, CSS and PNG, the SVG MIME type is not always builtin