From ec43f8b3434686b75ccf95dc955a2a6a8242ef05 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 14:55:30 +0900 Subject: [PATCH 01/11] rename FeedService::error to with_error --- src/server.rs | 2 +- src/server/feed_service.rs | 6 +++--- src/server/inspector.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/server.rs b/src/server.rs index df7326c..3e05fef 100644 --- a/src/server.rs +++ b/src/server.rs @@ -74,7 +74,7 @@ impl ServerConfig { info!("config updated, reloading service"); if !feed_service_clone.reload(&config_path_clone).await { feed_service_clone - .error(|e| { + .with_error(|e| { warn!("failed to reload config: {}", e); }) .await; diff --git a/src/server/feed_service.rs b/src/server/feed_service.rs index 96e8b0d..b570325 100644 --- a/src/server/feed_service.rs +++ b/src/server/feed_service.rs @@ -47,12 +47,12 @@ impl FeedService { }) } - pub async fn error( + pub async fn with_error( &self, - callback: impl FnOnce(&ConfigError) -> R, + f: impl FnOnce(&ConfigError) -> R, ) -> Option { let inner = self.inner.read().await; - inner.config_error.as_ref().map(callback) + inner.config_error.as_ref().map(f) } pub async fn root_config(&self) -> Arc { diff --git a/src/server/inspector.rs b/src/server/inspector.rs index cd2b61a..9476619 100644 --- a/src/server/inspector.rs +++ b/src/server/inspector.rs @@ -75,7 +75,7 @@ async fn config_handler( Extension(feed_service): Extension, ) -> impl IntoResponse { let json = json!({ - "config_error": feed_service.error(|e| e.to_string()).await, + "config_error": feed_service.with_error(|e| e.to_string()).await, "root_config": feed_service.root_config().await, }); Json(json) From b62c6a9e0baca924eb5ec8ec5fb9f574c0a02972 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 15:49:29 +0900 Subject: [PATCH 02/11] implement feed format conversion --- src/feed.rs | 24 +++- src/feed/conversion.rs | 280 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 302 insertions(+), 2 deletions(-) create mode 100644 src/feed/conversion.rs diff --git a/src/feed.rs b/src/feed.rs index c4407c2..9017971 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -1,3 +1,5 @@ +mod conversion; + use paste::paste; use schemars::JsonSchema; use serde::Deserialize; @@ -29,8 +31,6 @@ pub enum FeedFormat { } impl Feed { - // currently only used in tests. - #[cfg(test)] pub fn format(&self) -> FeedFormat { match self { Feed::Rss(_) => FeedFormat::Rss, @@ -38,6 +38,26 @@ impl Feed { } } + pub fn into_format(self, format: FeedFormat) -> Self { + use conversion::W; + + if self.format() == format { + return self; + } + + match (self, format) { + (Feed::Rss(channel), FeedFormat::Atom) => { + let feed: atom_syndication::Feed = W(channel).into(); + Feed::Atom(feed) + } + (Feed::Atom(feed), FeedFormat::Rss) => { + let channel: rss::Channel = W(feed).into(); + Feed::Rss(channel) + } + (original_self, _) => original_self, + } + } + pub fn from_rss_content(content: &[u8]) -> Result { let cursor = std::io::Cursor::new(content); let channel = rss::Channel::read_from(cursor)?; diff --git a/src/feed/conversion.rs b/src/feed/conversion.rs new file mode 100644 index 0000000..0da98e3 --- /dev/null +++ b/src/feed/conversion.rs @@ -0,0 +1,280 @@ +#![allow(clippy::field_reassign_with_default)] +// Utility wrapper type to get around orphan rules for implementing +// traits on foreign types. +pub(super) struct W(pub T); + +// The Extension struct and ExtensionMap type alias in the two +// libraries are defined identically. Therefore, we can convert +// between them by copying the fields. + +impl From> + for atom_syndication::extension::Extension +{ + fn from(W(ext): W) -> Self { + let rss::extension::Extension { + name, + value, + attrs, + children, + } = ext; + + let children = children + .into_iter() + .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) + .collect(); + + atom_syndication::extension::Extension { + name, + value, + attrs, + children, + } + } +} + +impl From> + for rss::extension::Extension +{ + fn from(W(ext): W) -> Self { + let atom_syndication::extension::Extension { + name, + value, + attrs, + children, + } = ext; + + let children = children + .into_iter() + .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) + .collect(); + + rss::extension::Extension { + name, + value, + attrs, + children, + } + } +} + +impl From> + for rss::extension::ExtensionMap +{ + fn from(W(ext): W) -> Self { + ext + .into_iter() + .map(|(k, v)| { + let v = v + .into_iter() + .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) + .collect(); + (k, v) + }) + .collect() + } +} + +impl From> + for atom_syndication::extension::ExtensionMap +{ + fn from(W(ext): W) -> Self { + ext + .into_iter() + .map(|(k, v)| { + let v = v + .into_iter() + .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) + .collect(); + (k, v) + }) + .collect() + } +} + +impl From> for atom_syndication::Feed { + fn from(W(channel): W) -> Self { + use atom_syndication::{Category, FixedDateTime, Generator, Link, Person}; + let parse_date = |s: &str| FixedDateTime::parse_from_rfc3339(s).ok(); + + let mut feed = atom_syndication::Feed::default(); + + // Title and ID already set + feed.title = channel.title.into(); + feed.id = channel.link.clone(); + + // Updated - using last_build_date if available, otherwise pub_date + feed.updated = channel + .last_build_date + .as_deref() + .or(channel.pub_date.as_deref()) + .and_then(parse_date) + .unwrap_or_default(); + + // Authors - Assuming managing_editor as the author if available + if let Some(editor) = channel.managing_editor { + let mut person = Person::default(); + person.name = editor; + feed.authors.push(person); + } + + // Links - Primary link to the channel's website + let mut link = Link::default(); + link.href = channel.link; + feed.links.push(link); + + // Categories + for category in channel.categories { + let mut cat = Category::default(); + cat.term = category.name; + feed.categories.push(cat); + } + + // Generator - Assuming it's a simple string without version or uri + if let Some(generator_str) = channel.generator { + let generator = Generator { + value: generator_str, + version: None, + uri: None, + }; + feed.generator = Some(generator); + } + + // Language as lang + feed.lang = channel.language; + + // Subtitle as a description + if !channel.description.is_empty() { + feed.subtitle = Some(channel.description.into()); + } + + feed.extensions = W(channel.extensions).into(); + + // Entries + feed.entries = channel + .items + .into_iter() + .map(W) + .map(atom_syndication::Entry::from) + .collect(); + + feed + } +} + +impl From> for atom_syndication::Entry { + fn from(W(item): W) -> Self { + use atom_syndication::{Content, Entry, FixedDateTime, Link, Person, Text}; + + let parse_date = |s: &str| FixedDateTime::parse_from_rfc3339(s).ok(); + + let mut entry = Entry::default(); + + entry.title = item.title.map_or_else(Text::default, |t| t.into()); + entry.id = item.guid.map_or_else(String::default, |g| g.value); + + if let Some(pub_date) = item.pub_date.as_deref().and_then(parse_date) { + entry.updated = pub_date; + entry.published = Some(pub_date); + } else { + entry.updated = FixedDateTime::default(); + } + + if let Some(author_email) = item.author { + let mut person = Person::default(); + person.name = author_email; + entry.authors.push(person); + } + + item.categories.into_iter().for_each(|cat| { + let mut category = atom_syndication::Category::default(); + category.term = cat.name; + entry.categories.push(category); + }); + + if let Some(link) = item.link.as_ref() { + let mut atom_link = Link::default(); + atom_link.href = link.clone(); + entry.links.push(atom_link); + } + + entry.summary = item.description.map(|d| d.into()); + + if let Some(content) = item.content { + let mut atom_content = Content::default(); + atom_content.value = Some(content); + entry.content = Some(atom_content); + } + + entry.extensions = W(item.extensions).into(); + + entry + } +} + +impl From> for rss::Item { + fn from(W(entry): W) -> Self { + let mut item = rss::Item::default(); + + item.title = Some(entry.title.as_str().to_owned()); + item.link = entry.links.first().map(|l| l.href.clone()); + item.description = entry.summary.map(|s| s.as_str().to_owned()); + item.author = entry.authors.first().map(|a| a.name.clone()); + item.pub_date = entry.published.map(|d| d.to_rfc2822()); + item.guid = Some(rss::Guid { + value: entry.id, + permalink: false, + }); + item.content = entry.content.and_then(|c| c.value); + + item.extensions = W(entry.extensions).into(); + + item.categories = entry + .categories + .into_iter() + .map(|c| rss::Category { + name: c.term, + domain: None, + }) + .collect(); + + item + } +} + +impl From> for rss::Channel { + fn from(W(feed): W) -> Self { + let mut channel = rss::Channel::default(); + + channel.title = feed.title.as_str().to_owned(); + channel.link = feed + .links + .first() + .map_or(String::default(), |l| l.href.clone()); + channel.description = feed + .subtitle + .map_or(String::default(), |s| s.as_str().to_owned()); + channel.last_build_date = Some(feed.updated.to_rfc2822()); + channel.language = feed.lang; + channel.generator = feed.generator.map(|g| g.value); + + channel.items = feed + .entries + .into_iter() + .map(W) + .map(rss::Item::from) + .collect(); + + channel.extensions = W(feed.extensions).into(); + + channel.categories = feed + .categories + .into_iter() + .map(|c| rss::Category { + name: c.term, + domain: None, + }) + .collect(); + + channel + } +} From 1e8b746cbdb59e2f7bd7c8119c1f4b18259352d1 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 16:02:07 +0900 Subject: [PATCH 03/11] implement the conversion filters --- src/feed.rs | 2 +- src/filter.rs | 1 + src/filter/convert.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/filter/convert.rs diff --git a/src/feed.rs b/src/feed.rs index 9017971..d41b32e 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -20,7 +20,7 @@ pub enum Feed { } #[derive( - JsonSchema, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, + JsonSchema, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, Copy, )] #[serde(rename_all = "lowercase")] pub enum FeedFormat { diff --git a/src/filter.rs b/src/filter.rs index e6bbacf..15ed46d 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,4 +1,5 @@ mod full_text; +mod convert; mod highlight; mod html; mod js; diff --git a/src/filter/convert.rs b/src/filter/convert.rs new file mode 100644 index 0000000..ad4fc1e --- /dev/null +++ b/src/filter/convert.rs @@ -0,0 +1,40 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::{ + feed::{Feed, FeedFormat}, + util::{ConfigError, Result}, +}; + +use super::{FeedFilter, FeedFilterConfig, FilterContext}; + +#[derive( + JsonSchema, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, +)] +#[serde(transparent)] +/// Convert feed to another format +struct ConvertToConfig { + format: FeedFormat, +} + +struct ConvertTo { + format: FeedFormat, +} + +#[async_trait::async_trait] +impl FeedFilterConfig for ConvertToConfig { + type Filter = ConvertTo; + + async fn build(self) -> Result { + Ok(ConvertTo { + format: self.format, + }) + } +} + +#[async_trait::async_trait] +impl FeedFilter for ConvertTo { + async fn run(&self, _ctx: &mut FilterContext, feed: Feed) -> Result { + Ok(feed.into_format(self.format)) + } +} From 7d240e25a6a82e8a96b673341e77de14b9152c99 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 19:29:05 +0900 Subject: [PATCH 04/11] register convert_to filter --- src/filter.rs | 3 ++- src/filter/convert.rs | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/filter.rs b/src/filter.rs index 15ed46d..fa21a35 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,5 +1,5 @@ -mod full_text; mod convert; +mod full_text; mod highlight; mod html; mod js; @@ -187,4 +187,5 @@ define_filters!( Highlight => highlight::HighlightConfig, "Highlight text or pattern"; Merge => merge::MergeConfig, "Merge extra feed into the main feed"; Note => note::NoteFilterConfig, "Add non-functional comment"; + ConvertTo => convert::ConvertToConfig, "Convert feed to another format"; ); diff --git a/src/filter/convert.rs b/src/filter/convert.rs index ad4fc1e..2bd47c4 100644 --- a/src/filter/convert.rs +++ b/src/filter/convert.rs @@ -13,11 +13,11 @@ use super::{FeedFilter, FeedFilterConfig, FilterContext}; )] #[serde(transparent)] /// Convert feed to another format -struct ConvertToConfig { +pub struct ConvertToConfig { format: FeedFormat, } -struct ConvertTo { +pub struct ConvertTo { format: FeedFormat, } From 0a86bb9a9553ff0d6656480ef7124511207d227b Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 21:23:47 +0900 Subject: [PATCH 05/11] reduce cloning in conversion module --- src/feed/conversion.rs | 122 ++++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 57 deletions(-) diff --git a/src/feed/conversion.rs b/src/feed/conversion.rs index 0da98e3..28e2939 100644 --- a/src/feed/conversion.rs +++ b/src/feed/conversion.rs @@ -1,4 +1,5 @@ #![allow(clippy::field_reassign_with_default)] + // Utility wrapper type to get around orphan rules for implementing // traits on foreign types. pub(super) struct W(pub T); @@ -23,7 +24,7 @@ impl From> .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) .collect(); - atom_syndication::extension::Extension { + Self { name, value, attrs, @@ -45,10 +46,10 @@ impl From> let children = children .into_iter() - .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) + .map(|(k, v)| (k, v.into_iter().map(W).map(Into::into).collect())) .collect(); - rss::extension::Extension { + Self { name, value, attrs, @@ -66,7 +67,7 @@ impl From> .map(|(k, v)| { let v = v .into_iter() - .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) + .map(|(k, v)| (k, v.into_iter().map(W).map(Into::into).collect())) .collect(); (k, v) }) @@ -83,7 +84,7 @@ impl From> .map(|(k, v)| { let v = v .into_iter() - .map(|(k, v)| (k, v.into_iter().map(|e| W(e).into()).collect())) + .map(|(k, v)| (k, v.into_iter().map(W).map(Into::into).collect())) .collect(); (k, v) }) @@ -93,12 +94,14 @@ impl From> impl From> for atom_syndication::Feed { fn from(W(channel): W) -> Self { - use atom_syndication::{Category, FixedDateTime, Generator, Link, Person}; + use atom_syndication::{ + Category, FixedDateTime, Generator, Link, Person, Text, + }; + let parse_date = |s: &str| FixedDateTime::parse_from_rfc3339(s).ok(); - let mut feed = atom_syndication::Feed::default(); + let mut feed = Self::default(); - // Title and ID already set feed.title = channel.title.into(); feed.id = channel.link.clone(); @@ -112,50 +115,56 @@ impl From> for atom_syndication::Feed { // Authors - Assuming managing_editor as the author if available if let Some(editor) = channel.managing_editor { - let mut person = Person::default(); - person.name = editor; + let person = Person { + name: editor, + ..Default::default() + }; feed.authors.push(person); } // Links - Primary link to the channel's website - let mut link = Link::default(); - link.href = channel.link; + let link = Link { + href: channel.link, + ..Default::default() + }; feed.links.push(link); // Categories - for category in channel.categories { - let mut cat = Category::default(); - cat.term = category.name; - feed.categories.push(cat); - } + feed.categories = channel + .categories + .into_iter() + .map(|category| Category { + term: category.name, + ..Default::default() + }) + .collect(); // Generator - Assuming it's a simple string without version or uri - if let Some(generator_str) = channel.generator { - let generator = Generator { - value: generator_str, - version: None, - uri: None, - }; - feed.generator = Some(generator); - } + feed.generator = channel.generator.map(|value| Generator { + value, + ..Default::default() + }); // Language as lang feed.lang = channel.language; // Subtitle as a description - if !channel.description.is_empty() { - feed.subtitle = Some(channel.description.into()); + feed.subtitle = Some(channel.description.into()); + + // Rights + feed.rights = channel.copyright.as_deref().map(Text::from); + + // Image and logo + if let Some(image) = channel.image { + feed.icon = Some(image.url.clone()); + feed.logo = Some(image.url); } + // Extensions feed.extensions = W(channel.extensions).into(); // Entries - feed.entries = channel - .items - .into_iter() - .map(W) - .map(atom_syndication::Entry::from) - .collect(); + feed.entries = channel.items.into_iter().map(W).map(Into::into).collect(); feed } @@ -170,7 +179,11 @@ impl From> for atom_syndication::Entry { let mut entry = Entry::default(); entry.title = item.title.map_or_else(Text::default, |t| t.into()); - entry.id = item.guid.map_or_else(String::default, |g| g.value); + entry.id = item + .guid + .map(|g| g.value) + .or_else(|| item.link.clone()) + .unwrap_or_default(); if let Some(pub_date) = item.pub_date.as_deref().and_then(parse_date) { entry.updated = pub_date; @@ -191,9 +204,9 @@ impl From> for atom_syndication::Entry { entry.categories.push(category); }); - if let Some(link) = item.link.as_ref() { + if let Some(link) = item.link { let mut atom_link = Link::default(); - atom_link.href = link.clone(); + atom_link.href = link; entry.links.push(atom_link); } @@ -215,10 +228,10 @@ impl From> for rss::Item { fn from(W(entry): W) -> Self { let mut item = rss::Item::default(); - item.title = Some(entry.title.as_str().to_owned()); - item.link = entry.links.first().map(|l| l.href.clone()); - item.description = entry.summary.map(|s| s.as_str().to_owned()); - item.author = entry.authors.first().map(|a| a.name.clone()); + item.title = Some(entry.title.value); + item.link = entry.links.into_iter().next().map(|l| l.href); + item.description = entry.summary.map(|s| s.value); + item.author = entry.authors.into_iter().next().map(|a| a.name); item.pub_date = entry.published.map(|d| d.to_rfc2822()); item.guid = Some(rss::Guid { value: entry.id, @@ -233,7 +246,7 @@ impl From> for rss::Item { .into_iter() .map(|c| rss::Category { name: c.term, - domain: None, + ..Default::default() }) .collect(); @@ -243,27 +256,22 @@ impl From> for rss::Item { impl From> for rss::Channel { fn from(W(feed): W) -> Self { - let mut channel = rss::Channel::default(); + let mut channel = Self::default(); channel.title = feed.title.as_str().to_owned(); channel.link = feed .links - .first() - .map_or(String::default(), |l| l.href.clone()); - channel.description = feed - .subtitle - .map_or(String::default(), |s| s.as_str().to_owned()); - channel.last_build_date = Some(feed.updated.to_rfc2822()); + .into_iter() + .next() + .map_or_else(String::default, |l| l.href); + channel.description = + feed.subtitle.as_deref().unwrap_or_default().to_owned(); + if feed.updated.timestamp() != 0 { + channel.last_build_date = Some(feed.updated.to_rfc2822()); + } channel.language = feed.lang; channel.generator = feed.generator.map(|g| g.value); - - channel.items = feed - .entries - .into_iter() - .map(W) - .map(rss::Item::from) - .collect(); - + channel.items = feed.entries.into_iter().map(W).map(Into::into).collect(); channel.extensions = W(feed.extensions).into(); channel.categories = feed @@ -271,7 +279,7 @@ impl From> for rss::Channel { .into_iter() .map(|c| rss::Category { name: c.term, - domain: None, + ..Default::default() }) .collect(); From 57fdce60ef62f9f37dd7ec999e86ecb2446f0626 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 21:24:54 +0900 Subject: [PATCH 06/11] add declaimer --- src/feed/conversion.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/feed/conversion.rs b/src/feed/conversion.rs index 28e2939..90cda5a 100644 --- a/src/feed/conversion.rs +++ b/src/feed/conversion.rs @@ -1,3 +1,6 @@ +// Disclaimer: A large part of this module was written using +// generative AI. + #![allow(clippy::field_reassign_with_default)] // Utility wrapper type to get around orphan rules for implementing From 737c07957d8638fa972b390fbb1c81339c2c34ee Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 21:48:34 +0900 Subject: [PATCH 07/11] add test for convert_to filter --- fixtures/minimal_rss_20.xml | 14 ++++++++ src/feed.rs | 26 ++++++++++++++- src/filter/convert.rs | 64 +++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 fixtures/minimal_rss_20.xml diff --git a/fixtures/minimal_rss_20.xml b/fixtures/minimal_rss_20.xml new file mode 100644 index 0000000..806f616 --- /dev/null +++ b/fixtures/minimal_rss_20.xml @@ -0,0 +1,14 @@ + + + Test + http://example.com + Test description + + + Item 1 + http://example.com/item1 + http://example.com/item1 + Item 1 description + + + diff --git a/src/feed.rs b/src/feed.rs index d41b32e..f17f6f8 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -12,7 +12,7 @@ use crate::source::FromScratch; use crate::util::Error; use crate::util::Result; -#[derive(Serialize, Deserialize, Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] #[serde(untagged)] pub enum Feed { Rss(rss::Channel), @@ -210,6 +210,30 @@ impl Feed { } } +#[cfg(test)] +impl TryFrom for rss::Channel { + type Error = (); + + fn try_from(feed: Feed) -> Result { + match feed { + Feed::Rss(channel) => Ok(channel), + _ => Err(()), + } + } +} + +#[cfg(test)] +impl TryFrom for atom_syndication::Feed { + type Error = (); + + fn try_from(feed: Feed) -> Result { + match feed { + Feed::Atom(feed) => Ok(feed), + _ => Err(()), + } + } +} + impl From<&FromScratch> for Feed { fn from(config: &FromScratch) -> Self { use FeedFormat::*; diff --git a/src/filter/convert.rs b/src/filter/convert.rs index 2bd47c4..dbf6015 100644 --- a/src/filter/convert.rs +++ b/src/filter/convert.rs @@ -38,3 +38,67 @@ impl FeedFilter for ConvertTo { Ok(feed.into_format(self.format)) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::fetch_endpoint; + use crate::util::Result; + + #[tokio::test] + async fn test_convert_to() -> Result<()> { + let config = r#" + !endpoint + path: /feed.xml + source: fixture:///minimal_rss_20.xml + filters: + - convert_to: atom + "#; + + let feed = fetch_endpoint(config, "").await; + assert_eq!(feed.format(), FeedFormat::Atom); + + let feed: atom_syndication::Feed = feed.try_into().unwrap(); + + assert_eq!(feed.title.as_str(), "Test"); + assert_eq!(feed.links[0].href, "http://example.com"); + assert_eq!( + feed.subtitle.as_ref().map(|e| e.as_str()), + Some("Test description") + ); + assert_eq!(feed.entries.len(), 1); + let post = feed.entries.into_iter().next().unwrap(); + assert_eq!(post.title.as_str(), "Item 1"); + assert_eq!( + post.links.iter().map(|l| &l.href).collect::>(), + vec!["http://example.com/item1"] + ); + assert_eq!( + post.summary.as_ref().map(|s| s.as_str()), + Some("Item 1 description") + ); + Ok(()) + } + + #[tokio::test] + async fn test_rss_to_atom_to_rss() { + let config_1 = r#" + !endpoint + path: /feed.xml + source: fixture:///minimal_rss_20.xml + filters: + - convert_to: atom + - convert_to: rss + "#; + let config_2 = r#" + !endpoint + path: /feed.xml + source: fixture:///minimal_rss_20.xml + filters: [] + "#; + + let feed_1 = fetch_endpoint(config_1, "").await; + let feed_2 = fetch_endpoint(config_2, "").await; + assert_eq!(feed_1, feed_2); + } +} From 6dc1ee321821c6313499ed4eccc1cd67c38145a4 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 21:57:29 +0900 Subject: [PATCH 08/11] convert more fields correctly --- src/feed/conversion.rs | 76 ++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/src/feed/conversion.rs b/src/feed/conversion.rs index 90cda5a..4ac774f 100644 --- a/src/feed/conversion.rs +++ b/src/feed/conversion.rs @@ -1,6 +1,3 @@ -// Disclaimer: A large part of this module was written using -// generative AI. - #![allow(clippy::field_reassign_with_default)] // Utility wrapper type to get around orphan rules for implementing @@ -101,7 +98,7 @@ impl From> for atom_syndication::Feed { Category, FixedDateTime, Generator, Link, Person, Text, }; - let parse_date = |s: &str| FixedDateTime::parse_from_rfc3339(s).ok(); + let parse_date = |s: &str| FixedDateTime::parse_from_rfc2822(dbg!(s)).ok(); let mut feed = Self::default(); @@ -111,8 +108,8 @@ impl From> for atom_syndication::Feed { // Updated - using last_build_date if available, otherwise pub_date feed.updated = channel .last_build_date + .or(channel.pub_date) .as_deref() - .or(channel.pub_date.as_deref()) .and_then(parse_date) .unwrap_or_default(); @@ -173,11 +170,45 @@ impl From> for atom_syndication::Feed { } } +impl From> for rss::Channel { + fn from(W(feed): W) -> Self { + let mut channel = Self::default(); + + channel.title = feed.title.as_str().to_owned(); + channel.link = feed + .links + .into_iter() + .next() + .map_or_else(String::default, |l| l.href); + channel.description = + feed.subtitle.as_deref().unwrap_or_default().to_owned(); + if feed.updated.timestamp() != 0 { + channel.last_build_date = Some(feed.updated.to_rfc2822()); + } + channel.language = feed.lang; + channel.generator = feed.generator.map(|g| g.value); + channel.items = feed.entries.into_iter().map(W).map(Into::into).collect(); + channel.extensions = W(feed.extensions).into(); + channel.managing_editor = feed.authors.into_iter().next().map(|a| a.name); + + channel.categories = feed + .categories + .into_iter() + .map(|c| rss::Category { + name: c.term, + ..Default::default() + }) + .collect(); + + channel + } +} + impl From> for atom_syndication::Entry { fn from(W(item): W) -> Self { use atom_syndication::{Content, Entry, FixedDateTime, Link, Person, Text}; - let parse_date = |s: &str| FixedDateTime::parse_from_rfc3339(s).ok(); + let parse_date = |s: &str| FixedDateTime::parse_from_rfc2822(s).ok(); let mut entry = Entry::default(); @@ -256,36 +287,3 @@ impl From> for rss::Item { item } } - -impl From> for rss::Channel { - fn from(W(feed): W) -> Self { - let mut channel = Self::default(); - - channel.title = feed.title.as_str().to_owned(); - channel.link = feed - .links - .into_iter() - .next() - .map_or_else(String::default, |l| l.href); - channel.description = - feed.subtitle.as_deref().unwrap_or_default().to_owned(); - if feed.updated.timestamp() != 0 { - channel.last_build_date = Some(feed.updated.to_rfc2822()); - } - channel.language = feed.lang; - channel.generator = feed.generator.map(|g| g.value); - channel.items = feed.entries.into_iter().map(W).map(Into::into).collect(); - channel.extensions = W(feed.extensions).into(); - - channel.categories = feed - .categories - .into_iter() - .map(|c| rss::Category { - name: c.term, - ..Default::default() - }) - .collect(); - - channel - } -} From e4bd6e6e212d236ecd0f43bbdf4dd121c8db5334 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 21:59:02 +0900 Subject: [PATCH 09/11] test for atom->rss->atom --- fixtures/sample_atom.xml | 39 +++++++++++++++++++++++++++++++++++++++ src/filter/convert.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 fixtures/sample_atom.xml diff --git a/fixtures/sample_atom.xml b/fixtures/sample_atom.xml new file mode 100644 index 0000000..56b7000 --- /dev/null +++ b/fixtures/sample_atom.xml @@ -0,0 +1,39 @@ + + + + Example Feed + A subtitle. + + + 2003-12-13T18:30:02Z + + John Doe + johndoe@example.com + + urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6 + + + + 10 + + + Atom-Powered Robots Run Amok + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + + + {"firstName"="John","lastName"="Doe","id"="10"} + + text/json + + Atom-Powered Robots Run Amok + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + diff --git a/src/filter/convert.rs b/src/filter/convert.rs index dbf6015..7de39fa 100644 --- a/src/filter/convert.rs +++ b/src/filter/convert.rs @@ -101,4 +101,30 @@ mod tests { let feed_2 = fetch_endpoint(config_2, "").await; assert_eq!(feed_1, feed_2); } + + #[tokio::test] + async fn test_atom_to_rss_to_atom() { + let config_1 = r#" + !endpoint + path: /feed.xml + source: fixture:///sample_atom.xml + filters: + - convert_to: rss + - convert_to: atom + "#; + let config_2 = r#" + !endpoint + path: /feed.xml + source: fixture:///sample_atom.xml + filters: [] + "#; + + let _feed_1 = fetch_endpoint(config_1, "").await; + let _feed_2 = fetch_endpoint(config_2, "").await; + // assert_eq!(feed_1, feed_2); + + // The feeds are not equal because conversion to rss generally + // loses some information. I mainly used this test to verify that + // the conversion logic is sane for the most part. + } } From 25faaf296487a77864ec319ed283e40265a7671d Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 21:59:24 +0900 Subject: [PATCH 10/11] remove debugging statement --- src/feed/conversion.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/feed/conversion.rs b/src/feed/conversion.rs index 4ac774f..fc4e5d8 100644 --- a/src/feed/conversion.rs +++ b/src/feed/conversion.rs @@ -98,7 +98,7 @@ impl From> for atom_syndication::Feed { Category, FixedDateTime, Generator, Link, Person, Text, }; - let parse_date = |s: &str| FixedDateTime::parse_from_rfc2822(dbg!(s)).ok(); + let parse_date = |s: &str| FixedDateTime::parse_from_rfc2822(s).ok(); let mut feed = Self::default(); From 9db81d84ad31a69716841da9cbd9789aaddb7867 Mon Sep 17 00:00:00 2001 From: shouya Date: Tue, 5 Mar 2024 22:01:07 +0900 Subject: [PATCH 11/11] rename fixtures/scishow.xml into more a generic name --- fixtures/{scishow.xml => youtube.xml} | 0 src/filter/js.rs | 6 +++--- src/filter/merge.rs | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) rename fixtures/{scishow.xml => youtube.xml} (100%) diff --git a/fixtures/scishow.xml b/fixtures/youtube.xml similarity index 100% rename from fixtures/scishow.xml rename to fixtures/youtube.xml diff --git a/src/filter/js.rs b/src/filter/js.rs index 0a89e8c..ac4da1e 100644 --- a/src/filter/js.rs +++ b/src/filter/js.rs @@ -167,7 +167,7 @@ mod tests { let config = r#" !endpoint path: /feed.xml - source: fixture:///scishow.xml + source: fixture:///youtube.xml filters: - js: | function modify_feed(feed) { @@ -195,7 +195,7 @@ mod tests { let config = r#" !endpoint path: /feed.xml - source: fixture:///scishow.xml + source: fixture:///youtube.xml filters: - modify_post: post.title += " (modified)"; "#; @@ -212,7 +212,7 @@ mod tests { let config = r#" !endpoint path: /feed.xml - source: fixture:///scishow.xml + source: fixture:///youtube.xml filters: - modify_feed: feed.title.value = "Modified Feed"; "#; diff --git a/src/filter/merge.rs b/src/filter/merge.rs index c3a6904..ba41fda 100644 --- a/src/filter/merge.rs +++ b/src/filter/merge.rs @@ -147,10 +147,10 @@ mod test { let config = r#" !endpoint path: /feed.xml - source: fixture:///scishow.xml + source: fixture:///youtube.xml filters: - merge: - source: fixture:///scishow.xml + source: fixture:///youtube.xml filters: - js: | function modify_post(feed, post) { @@ -185,11 +185,11 @@ mod test { let config = r#" !endpoint path: /feed.xml - source: fixture:///scishow.xml + source: fixture:///youtube.xml filters: - merge: - - fixture:///scishow.xml - - fixture:///scishow.xml + - fixture:///youtube.xml + - fixture:///youtube.xml "#; let mut feed = fetch_endpoint(config, "").await;