Skip to content

Commit

Permalink
sitemap: sitemapindex template addition
Browse files Browse the repository at this point in the history
* Adds support for sitemap pages.  (closes #3)

Signed-off-by: Jiri Kuncar <jiri.kuncar@cern.ch>
  • Loading branch information
jirikuncar committed Nov 11, 2014
1 parent 34e050f commit 2eca2d4
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 4 deletions.
68 changes: 65 additions & 3 deletions flask_sitemap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@

from __future__ import absolute_import

import gzip
import sys

from collections import Mapping
from flask import current_app, request, Blueprint, render_template, url_for
from flask import current_app, request, Blueprint, render_template, url_for, \
Response
from flask.signals import Namespace
from functools import wraps
from itertools import islice
from werkzeug.utils import import_string


Expand All @@ -39,9 +43,22 @@

# PY2/3 compatibility
if sys.version_info[0] == 3:
import io
StringIO = io.StringIO
string_types = str,
from itertools import izip_longest as zip_longest
else:
from cStringIO import StringIO
string_types = basestring,
from itertolls import zip_longest


# Signals
_signals = Namespace()

#: Sent when a sitemap index is generated and given page will need to be
#: generated in the future from already calculated url set.
sitemap_page_needed = _signals.signal('sitemap-page-needed')


class Sitemap(object):
Expand Down Expand Up @@ -88,6 +105,11 @@ def init_app(self, app):
'sitemap',
self._decorate(self.sitemap)
)
self.blueprint.add_url_rule(
app.config.get('SITEMAP_ENDPOINT_PAGE_URL'),
'page',
self._decorate(self.page)
)
app.register_blueprint(
self.blueprint,
url_prefix=app.config.get('SITEMAP_BLUEPRINT_URL_PREFIX')
Expand All @@ -105,8 +127,34 @@ def wrapper(*args, **kwargs):

def sitemap(self):
"""Generate sitemap.xml."""
return render_template('flask_sitemap/sitemap.xml',
urlset=self._generate_all_urls())
size = self.app.config['SITEMAP_MAX_URL_COUNT']
args = [iter(self._generate_all_urls())] * size
run = zip_longest(*args)
urlset = next(run)

if urlset[-1] is None:
return render_template('flask_sitemap/sitemap.xml',
urlset=filter(None, urlset))

def pages():
yield {'loc': url_for('flask_sitemap.page', page=1)}
sitemap_page_needed.send(current_app._get_current_object(),
page=1, urlset=urlset)
for page, urlset_ in enumerate(run):
yield {'loc': url_for('flask_sitemap.page', page=page+2)}
sitemap_page_needed.send(current_app._get_current_object(),
page=page+2, urlset=urlset_)

return render_template('flask_sitemap/sitemapindex.xml',
sitemaps=pages())

def page(self, page):
"""Generate sitemap for given range of urls."""
size = self.app.config['SITEMAP_MAX_URL_COUNT']
urlset = islice(self._generate_all_urls(), (page-1)*size, page*size)
return self.gzip_response(
render_template('flask_sitemap/sitemap.xml', urlset=urlset)
)

def register_generator(self, generator):
"""Register an URL generator.
Expand Down Expand Up @@ -166,5 +214,19 @@ def _generate_all_urls(self):
result['loc'] = url_for(endpoint, **values)
yield result

def gzip_response(self, data):
"""GZip response data and create new Response instance."""
gzip_buffer = StringIO()
gzip_file = gzip.GzipFile(mode='wb', compresslevel=6,
fileobj=gzip_buffer)
gzip_file.write(data)
gzip_file.close()
response = Response()
response.data = gzip_buffer.getvalue()
response.headers['Content-Encoding'] = 'gzip'
response.headers['Content-Length'] = len(response.data)

return response


__all__ = ('Sitemap', '__version__')
27 changes: 26 additions & 1 deletion flask_sitemap/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,19 @@
SITEMAP_ENDPOINT_URL
--------------------
Default: ``/sitemap.xml``.
Return sitemap index or sitemap for pages with less than
``SITEMAP_MAX_URL_COUNT`` urls.
Default: ``sitemap.xml``.
SITEMAP_ENDPOINT_PAGE_URL
-------------------------
Return GZipped sitemap for given page range of urls.
.. note:: It is strongly recommended to provide caching decorator.
Default: ``sitemap<int:page>.xml.gz``
SITEMAP_INCLUDE_RULES_WITHOUT_PARAMS
------------------------------------
Expand All @@ -46,6 +58,15 @@
----------------------
Default: ``[]``.
SITEMAP_MAX_URL_COUNT
---------------------
The maximum number of urls per one sitemap file can be up to 50000, however
there is 10MB limitation for the file.
Default: ``10000``.
"""

SITEMAP_BLUEPRINT = 'flask_sitemap'
Expand All @@ -54,10 +75,14 @@

SITEMAP_ENDPOINT_URL = 'sitemap.xml'

SITEMAP_ENDPOINT_PAGE_URL = 'sitemap<int:page>.xml.gz'

SITEMAP_URL_SCHEME = 'http'

SITEMAP_INCLUDE_RULES_WITHOUT_PARAMS = False

SITEMAP_IGNORE_ENDPOINTS = None

SITEMAP_VIEW_DECORATORS = []

SITEMAP_MAX_URL_COUNT = 10000
11 changes: 11 additions & 0 deletions flask_sitemap/templates/flask_sitemap/sitemapindex.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{%- for sitemap in sitemaps %}
<sitemap>
<loc>{{ sitemap.loc }}</loc>
{%- if sitemap.lastmod %}
<lastmod>{{ sitemap.lastmod }}</lastmod>
{%- endif %}
</sitemap>
{%- endfor %}
</sitemapindex>
33 changes: 33 additions & 0 deletions tests/test_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,36 @@ def third(dummy):

with self.app.test_client() as c:
assert b('third') == c.get('/sitemap.xml').data

def test_pagination(self):
self.app.config['SERVER_NAME'] = 'www.example.com'
self.app.config['SITEMAP_INCLUDE_RULES_WITHOUT_PARAMS'] = True
self.app.config['SITEMAP_MAX_URL_COUNT'] = 10
sitemap = Sitemap(app=self.app)
now = datetime.now().isoformat()

@self.app.route('/')
def index():
pass

@self.app.route('/first')
def first():
pass

@self.app.route('/second')
def second():
pass

@self.app.route('/<username>')
def user(username):
pass

@sitemap.register_generator
def user():
for number in range(20):
yield 'user', {'username': 'test{0}'.format(number)}

with self.app.test_client() as c:
assert b('sitemapindex') in c.get('/sitemap.xml').data
assert b('gzip') in c.get(
'/sitemap1.xml.gz').headers['Content-Encoding']

0 comments on commit 2eca2d4

Please sign in to comment.