From 1d5654a4db7693fecbcc518acf20b4e8713b9735 Mon Sep 17 00:00:00 2001 From: Philippe MILINK Date: Sat, 4 Jun 2022 18:02:31 +0200 Subject: [PATCH] =?UTF-8?q?Supprime=20les=20caract=C3=A8res=20non=20suppor?= =?UTF-8?q?t=C3=A9s=20par=20les=20flux=20RSS=20et=20ATOM=20lors=20de=20leu?= =?UTF-8?q?r=20g=C3=A9n=C3=A9ration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Une erreur 500 UnserializableContentError est levée sinon. Détecté grâce à Sentry. --- zds/forum/feeds.py | 9 ++-- zds/forum/tests/tests_feeds.py | 37 ++++++++++++++++ zds/tutorialv2/feeds.py | 11 ++--- zds/tutorialv2/tests/tests_feeds.py | 65 +++++++++++++++++++++++++++++ zds/utils/feeds.py | 34 +++++++++++++++ 5 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 zds/utils/feeds.py diff --git a/zds/forum/feeds.py b/zds/forum/feeds.py index aea0211e88..e1e718f4b3 100644 --- a/zds/forum/feeds.py +++ b/zds/forum/feeds.py @@ -1,10 +1,9 @@ from django.contrib.syndication.views import Feed - -from django.utils.feedgenerator import Atom1Feed from django.conf import settings from django.utils.timezone import make_aware from pytz import AmbiguousTimeError, NonExistentTimeError +from zds.utils.feeds import DropControlCharsRss201rev2Feed, DropControlCharsAtom1Feed from .models import Post, Topic @@ -41,6 +40,7 @@ class LastPostsFeedRSS(Feed, ItemMixin): title = "Derniers messages sur {}".format(settings.ZDS_APP["site"]["literal_name"]) link = "/forums/" description = "Les derniers messages parus sur le forum de {}.".format(settings.ZDS_APP["site"]["literal_name"]) + feed_type = DropControlCharsRss201rev2Feed def get_object(self, request): return request_object(request) @@ -65,7 +65,7 @@ def item_description(self, item): class LastPostsFeedATOM(LastPostsFeedRSS): - feed_type = Atom1Feed + feed_type = DropControlCharsAtom1Feed subtitle = LastPostsFeedRSS.description @@ -73,6 +73,7 @@ class LastTopicsFeedRSS(Feed, ItemMixin): title = "Derniers sujets sur {}".format(settings.ZDS_APP["site"]["literal_name"]) link = "/forums/" description = "Les derniers sujets créés sur le forum de {}.".format(settings.ZDS_APP["site"]["literal_name"]) + feed_type = DropControlCharsRss201rev2Feed def get_object(self, request): return request_object(request) @@ -97,5 +98,5 @@ def item_description(self, item): class LastTopicsFeedATOM(LastTopicsFeedRSS): - feed_type = Atom1Feed + feed_type = DropControlCharsAtom1Feed subtitle = LastTopicsFeedRSS.description diff --git a/zds/forum/tests/tests_feeds.py b/zds/forum/tests/tests_feeds.py index bb02b0e7e0..2439b1140b 100644 --- a/zds/forum/tests/tests_feeds.py +++ b/zds/forum/tests/tests_feeds.py @@ -139,6 +139,24 @@ def test_get_item_link(self): ret = self.topicfeed.item_link(item=topics[0]) self.assertEqual(ret, ref) + def test_content_control_chars(self): + """ + Test 'control characters' in content of the feed doesn't break it. + + The '\u0007' character in the post content belongs to a character + family that is not supported in RSS or Atom feeds and will break their + generation. + """ + buggy_topic = TopicFactory(forum=self.forum2, author=self.user) + buggy_topic.title = "Strange char: \u0007" + buggy_topic.save() + + request = self.client.get(reverse("topic-feed-rss")) + self.assertEqual(request.status_code, 200) + + request = self.client.get(reverse("topic-feed-atom")) + self.assertEqual(request.status_code, 200) + class LastPostsFeedTest(TestCase): def setUp(self): @@ -284,3 +302,22 @@ def test_get_item_link(self): posts = self.postfeed.items(obj={"tag": self.tag2.pk}) ret = self.postfeed.item_link(item=posts[0]) self.assertEqual(ret, ref) + + def test_content_control_chars(self): + """ + Test 'control characters' in content of the feed doesn't break it. + + The '\u0007' character in the post content belongs to a character + family that is not supported in RSS or Atom feeds and will break their + generation. + """ + buggy_topic = TopicFactory(forum=self.forum2, author=self.user) + post = PostFactory(topic=buggy_topic, author=self.user, position=1) + post.update_content("Strange char: \u0007") + post.save() + + request = self.client.get(reverse("post-feed-rss")) + self.assertEqual(request.status_code, 200) + + request = self.client.get(reverse("post-feed-atom")) + self.assertEqual(request.status_code, 200) diff --git a/zds/tutorialv2/feeds.py b/zds/tutorialv2/feeds.py index 95cca5ee52..d4bfeadb8c 100644 --- a/zds/tutorialv2/feeds.py +++ b/zds/tutorialv2/feeds.py @@ -2,10 +2,10 @@ from django.contrib.syndication.views import Feed from django.shortcuts import get_object_or_404 from django.utils.timezone import make_aware -from django.utils.feedgenerator import Atom1Feed from django.utils.translation import gettext_lazy as _ from pytz import AmbiguousTimeError, NonExistentTimeError +from zds.utils.feeds import DropControlCharsRss201rev2Feed, DropControlCharsAtom1Feed from zds.utils.models import Category, SubCategory, Tag from zds.utils.uuslug_wrapper import slugify from zds.tutorialv2.models.database import PublishedContent @@ -21,6 +21,7 @@ class LastContentFeedRSS(Feed): link = "" content_type = None query_params = {} + feed_type = DropControlCharsRss201rev2Feed def get_object(self, request, *args, **kwargs): self.query_params = request.GET @@ -80,7 +81,7 @@ def item_link(self, item): class LastContentFeedATOM(LastContentFeedRSS): - feed_type = Atom1Feed + feed_type = DropControlCharsAtom1Feed subtitle = LastContentFeedRSS.description @@ -96,7 +97,7 @@ class LastTutorialsFeedRSS(LastContentFeedRSS): class LastTutorialsFeedATOM(LastTutorialsFeedRSS): - feed_type = Atom1Feed + feed_type = DropControlCharsAtom1Feed subtitle = LastTutorialsFeedRSS.description @@ -112,7 +113,7 @@ class LastArticlesFeedRSS(LastContentFeedRSS): class LastArticlesFeedATOM(LastArticlesFeedRSS): - feed_type = Atom1Feed + feed_type = DropControlCharsAtom1Feed subtitle = LastArticlesFeedRSS.description @@ -128,5 +129,5 @@ class LastOpinionsFeedRSS(LastContentFeedRSS): class LastOpinionsFeedATOM(LastOpinionsFeedRSS): - feed_type = Atom1Feed + feed_type = DropControlCharsAtom1Feed subtitle = LastOpinionsFeedRSS.description diff --git a/zds/tutorialv2/tests/tests_feeds.py b/zds/tutorialv2/tests/tests_feeds.py index 278e3b9768..d165c5d73b 100644 --- a/zds/tutorialv2/tests/tests_feeds.py +++ b/zds/tutorialv2/tests/tests_feeds.py @@ -2,6 +2,7 @@ from django.http import Http404 from django.test import TestCase from django.test.utils import override_settings +from django.urls import reverse from zds.gallery.tests.factories import UserGalleryFactory from zds.member.tests.factories import ProfileFactory @@ -17,6 +18,7 @@ ) from zds.tutorialv2.tests.factories import ( PublishableContentFactory, + PublishedContentFactory, ContainerFactory, ExtractFactory, ) @@ -197,6 +199,27 @@ def test_filters(self): self.tutofeed.query_params = {"tag": "invalid"} self.assertRaises(Http404, self.tutofeed.items) + def test_content_control_chars(self): + """ + Test 'control characters' in content of the feed doesn't break it. + + The '\u0007' character in the post content belongs to a character + family that is not supported in RSS or Atom feeds and will break their + generation. + """ + buggy_tutorial = PublishedContentFactory( + author_list=[self.user_author], type="TUTORIAL", description="Strange char: \u0007" + ) + buggy_tutorial.subcategory.add(self.subcategory) + buggy_tutorial.tags.add(self.tag) + buggy_tutorial.save() + + request = self.client.get(reverse("tutorial:feed-rss")) + self.assertEqual(request.status_code, 200) + + request = self.client.get(reverse("tutorial:feed-atom")) + self.assertEqual(request.status_code, 200) + @override_settings(ZDS_APP=overridden_zds_app) class LastArticlesFeedsTest(TutorialTestMixin, TestCase): @@ -362,6 +385,27 @@ def test_filters(self): self.articlefeed.query_params = {"tag": "invalid"} self.assertRaises(Http404, self.articlefeed.items) + def test_content_control_chars(self): + """ + Test 'control characters' in content of the feed doesn't break it. + + The '\u0007' character in the post content belongs to a character + family that is not supported in RSS or Atom feeds and will break their + generation. + """ + buggy_article = PublishedContentFactory( + author_list=[self.user_author], type="ARTICLE", description="Strange char: \u0007" + ) + buggy_article.subcategory.add(self.subcategory) + buggy_article.tags.add(self.tag) + buggy_article.save() + + request = self.client.get(reverse("article:feed-rss")) + self.assertEqual(request.status_code, 200) + + request = self.client.get(reverse("article:feed-atom")) + self.assertEqual(request.status_code, 200) + @override_settings(ZDS_APP=overridden_zds_app) class LastOpinionsFeedsTest(TutorialTestMixin, TestCase): @@ -503,3 +547,24 @@ def test_filters(self): self.opinionfeed.query_params = {"tag": "invalid"} self.assertRaises(Http404, self.opinionfeed.items) + + def test_content_control_chars(self): + """ + Test 'control characters' in content of the feed doesn't break it. + + The '\u0007' character in the post content belongs to a character + family that is not supported in RSS or Atom feeds and will break their + generation. + """ + buggy_opinion = PublishedContentFactory( + author_list=[self.user_author], type="OPINION", description="Strange char: \u0007" + ) + buggy_opinion.subcategory.add(self.subcategory) + buggy_opinion.tags.add(self.tag) + buggy_opinion.save() + + request = self.client.get(reverse("opinion:feed-rss")) + self.assertEqual(request.status_code, 200) + + request = self.client.get(reverse("opinion:feed-atom")) + self.assertEqual(request.status_code, 200) diff --git a/zds/utils/feeds.py b/zds/utils/feeds.py new file mode 100644 index 0000000000..e05aa06d0a --- /dev/null +++ b/zds/utils/feeds.py @@ -0,0 +1,34 @@ +import re + +from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed +from django.utils.xmlutils import SimplerXMLGenerator + + +class DropControlCharsXMLGenerator(SimplerXMLGenerator): + def characters(self, content): + # From django.utils.xmlutils.SimplerXMLGenerator.characters() + super().characters(re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F]", "", content)) + + +class DropControlCharsRss201rev2Feed(Rss201rev2Feed): + def write(self, outfile, encoding): + # From django.utils.feedgenerator.RssFeed.write() + handler = DropControlCharsXMLGenerator(outfile, encoding) + handler.startDocument() + handler.startElement("rss", self.rss_attributes()) + handler.startElement("channel", self.root_attributes()) + self.add_root_elements(handler) + self.write_items(handler) + self.endChannelElement(handler) + handler.endElement("rss") + + +class DropControlCharsAtom1Feed(Atom1Feed): + def write(self, outfile, encoding): + # From django.utils.feedgenerator.Atom1Feed.write() + handler = DropControlCharsXMLGenerator(outfile, encoding) + handler.startDocument() + handler.startElement("feed", self.root_attributes()) + self.add_root_elements(handler) + self.write_items(handler) + handler.endElement("feed")