Skip to content

Commit

Permalink
Sanitize RSS feed to remove control characters (#565)
Browse files Browse the repository at this point in the history
  • Loading branch information
sissbruecker authored Oct 27, 2023
1 parent 314e4a9 commit de328c7
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
13 changes: 10 additions & 3 deletions bookmarks/feeds.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import unicodedata
from dataclasses import dataclass

from django.contrib.syndication.views import Feed
from django.db.models import QuerySet
from django.urls import reverse

from bookmarks.models import Bookmark, BookmarkSearch, FeedToken
from bookmarks import queries
from bookmarks.models import Bookmark, BookmarkSearch, FeedToken


@dataclass
Expand All @@ -14,6 +15,12 @@ class FeedContext:
query_set: QuerySet[Bookmark]


def sanitize(text: str):
# remove control characters
valid_chars = ['\n', '\r', '\t']
return ''.join(ch for ch in text if ch in valid_chars or unicodedata.category(ch)[0] != 'C')


class BaseBookmarksFeed(Feed):
def get_object(self, request, feed_key: str):
feed_token = FeedToken.objects.get(key__exact=feed_key)
Expand All @@ -22,10 +29,10 @@ def get_object(self, request, feed_key: str):
return FeedContext(feed_token, query_set)

def item_title(self, item: Bookmark):
return item.resolved_title
return sanitize(item.resolved_title)

def item_description(self, item: Bookmark):
return item.resolved_description
return sanitize(item.resolved_description)

def item_link(self, item: Bookmark):
return item.url
Expand Down
8 changes: 8 additions & 0 deletions bookmarks/tests/test_feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ def test_all_returns_only_user_owned_bookmarks(self):

self.assertContains(response, '<item>', count=0)

def test_strip_control_characters(self):
self.setup_bookmark(title='test\n\r\t\0\x08title', description='test\n\r\t\0\x08description')
response = self.client.get(reverse('bookmarks:feeds.all', args=[self.token.key]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, '<item>', count=1)
self.assertContains(response, f'<title>test\n\r\ttitle</title>', count=1)
self.assertContains(response, f'<description>test\n\r\tdescription</description>', count=1)

def test_unread_returns_404_for_unknown_feed_token(self):
response = self.client.get(reverse('bookmarks:feeds.unread', args=['foo']))

Expand Down

0 comments on commit de328c7

Please sign in to comment.