Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sitemap: sitemapindex template addition #7

Merged
merged 1 commit into from
Nov 11, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 68 additions & 4 deletions flask_sitemap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,41 @@

from __future__ import absolute_import

import gzip
import sys

from collections import Mapping
from flask import current_app, request, Blueprint, render_template, url_for
from flask import current_app, request, Blueprint, render_template, url_for, \
Response
from flask.signals import Namespace
from functools import wraps
from itertools import islice
from werkzeug.utils import import_string


from . import config
from .version import __version__

# PY2/3 compatibility
if sys.version_info[0] == 3:
if sys.version_info[0] == 3: # pragma: no cover
import io
BytesIO = io.BytesIO
string_types = str,
from itertools import zip_longest
b = lambda s: s.encode("latin-1")
else:
from cStringIO import StringIO as BytesIO
string_types = basestring,
from itertools import izip_longest as zip_longest
b = lambda s: s


# Signals
_signals = Namespace()

#: Sent when a sitemap index is generated and given page will need to be
#: generated in the future from already calculated url set.
sitemap_page_needed = _signals.signal('sitemap-page-needed')


class Sitemap(object):
Expand Down Expand Up @@ -88,6 +107,11 @@ def init_app(self, app):
'sitemap',
self._decorate(self.sitemap)
)
self.blueprint.add_url_rule(
app.config.get('SITEMAP_ENDPOINT_PAGE_URL'),
'page',
self._decorate(self.page)
)
app.register_blueprint(
self.blueprint,
url_prefix=app.config.get('SITEMAP_BLUEPRINT_URL_PREFIX')
Expand All @@ -105,8 +129,34 @@ def wrapper(*args, **kwargs):

def sitemap(self):
"""Generate sitemap.xml."""
return render_template('flask_sitemap/sitemap.xml',
urlset=self._generate_all_urls())
size = self.app.config['SITEMAP_MAX_URL_COUNT']
args = [iter(self._generate_all_urls())] * size
run = zip_longest(*args)
urlset = next(run)

if urlset[-1] is None:
return render_template('flask_sitemap/sitemap.xml',
urlset=filter(None, urlset))

def pages():
yield {'loc': url_for('flask_sitemap.page', page=1)}
sitemap_page_needed.send(current_app._get_current_object(),
page=1, urlset=urlset)
for page, urlset_ in enumerate(run):
yield {'loc': url_for('flask_sitemap.page', page=page+2)}
sitemap_page_needed.send(current_app._get_current_object(),
page=page+2, urlset=urlset_)

return render_template('flask_sitemap/sitemapindex.xml',
sitemaps=pages())

def page(self, page):
"""Generate sitemap for given range of urls."""
size = self.app.config['SITEMAP_MAX_URL_COUNT']
urlset = islice(self._generate_all_urls(), (page-1)*size, page*size)
return self.gzip_response(
render_template('flask_sitemap/sitemap.xml', urlset=urlset)
)

def register_generator(self, generator):
"""Register an URL generator.
Expand Down Expand Up @@ -166,5 +216,19 @@ def _generate_all_urls(self):
result['loc'] = url_for(endpoint, **values)
yield result

def gzip_response(self, data):
"""GZip response data and create new Response instance."""
gzip_buffer = BytesIO()
gzip_file = gzip.GzipFile(mode='wb', compresslevel=6,
fileobj=gzip_buffer)
gzip_file.write(b(data))
gzip_file.close()
response = Response()
response.data = gzip_buffer.getvalue()
response.headers['Content-Encoding'] = 'gzip'
response.headers['Content-Length'] = len(response.data)

return response


__all__ = ('Sitemap', '__version__')
27 changes: 26 additions & 1 deletion flask_sitemap/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,19 @@
SITEMAP_ENDPOINT_URL
--------------------

Default: ``/sitemap.xml``.
Return sitemap index or sitemap for pages with less than
``SITEMAP_MAX_URL_COUNT`` urls.

Default: ``sitemap.xml``.

SITEMAP_ENDPOINT_PAGE_URL
-------------------------

Return GZipped sitemap for given page range of urls.

.. note:: It is strongly recommended to provide caching decorator.

Default: ``sitemap<int:page>.xml.gz``

SITEMAP_INCLUDE_RULES_WITHOUT_PARAMS
------------------------------------
Expand All @@ -46,6 +58,15 @@
----------------------

Default: ``[]``.

SITEMAP_MAX_URL_COUNT
---------------------

The maximum number of urls per one sitemap file can be up to 50000, however
there is 10MB limitation for the file.

Default: ``10000``.

"""

SITEMAP_BLUEPRINT = 'flask_sitemap'
Expand All @@ -54,10 +75,14 @@

SITEMAP_ENDPOINT_URL = 'sitemap.xml'

SITEMAP_ENDPOINT_PAGE_URL = 'sitemap<int:page>.xml.gz'

SITEMAP_URL_SCHEME = 'http'

SITEMAP_INCLUDE_RULES_WITHOUT_PARAMS = False

SITEMAP_IGNORE_ENDPOINTS = None

SITEMAP_VIEW_DECORATORS = []

SITEMAP_MAX_URL_COUNT = 10000
11 changes: 11 additions & 0 deletions flask_sitemap/templates/flask_sitemap/sitemapindex.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{%- for sitemap in sitemaps %}
<sitemap>
<loc>{{ sitemap.loc }}</loc>
{%- if sitemap.lastmod %}
<lastmod>{{ sitemap.lastmod }}</lastmod>
{%- endif %}
</sitemap>
{%- endfor %}
</sitemapindex>
32 changes: 32 additions & 0 deletions tests/test_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,35 @@ def third(dummy):

with self.app.test_client() as c:
assert b('third') == c.get('/sitemap.xml').data

def test_pagination(self):
self.app.config['SERVER_NAME'] = 'www.example.com'
self.app.config['SITEMAP_INCLUDE_RULES_WITHOUT_PARAMS'] = True
self.app.config['SITEMAP_MAX_URL_COUNT'] = 10
sitemap = Sitemap(app=self.app)
now = datetime.now().isoformat()

@self.app.route('/')
def index():
pass

@self.app.route('/first')
def first():
pass

@self.app.route('/second')
def second():
pass

@self.app.route('/<username>')
def user(username):
pass

@sitemap.register_generator
def user():
for number in range(20):
yield 'user', {'username': 'test{0}'.format(number)}

with self.app.test_client() as c:
assert b('sitemapindex') in c.get('/sitemap.xml').data
assert len(c.get('/sitemap1.xml.gz').data) > 0