diff --git a/src/db/feed_items.rs b/src/db/feed_items.rs index f263b805..62f89389 100644 --- a/src/db/feed_items.rs +++ b/src/db/feed_items.rs @@ -20,6 +20,72 @@ pub struct NewFeedItem { pub content_hash: String, } +pub trait ContentHashable { + fn content_fields(&self, feed: &Feed) -> Vec; + + fn hash(&self, feed: &Feed) -> String { + let content_string = self.content_fields(feed).join(""); + + let mut hasher = Sha256::new(); + hasher.update(content_string.as_bytes()); + + let result = hasher.finalize(); + hex::encode(result) + } + + fn content_field_names(&self, feed: &Feed) -> Vec { + feed.content_fields + .clone() + .unwrap_or_else(|| vec!["link".to_string(), "title".to_string()]) + } +} + +impl ContentHashable for FetchedFeedItem { + fn content_fields(&self, feed: &Feed) -> Vec { + let mut content_values = vec![]; + + for field in self.content_field_names(feed) { + match field.as_str() { + "link" => content_values.push(self.link.clone()), + "title" => content_values.push(self.title.clone()), + + "publication_date" => content_values.push(self.publication_date.to_string()), + "guid" => content_values.push(self.guid.clone().unwrap_or("".to_string())), + "description" => { + content_values.push(self.description.clone().unwrap_or("".to_string())) + } + "author" => content_values.push(self.author.clone().unwrap_or("".to_string())), + &_ => (), + } + } + + content_values + } +} + +impl ContentHashable for FeedItem { + fn content_fields(&self, feed: &Feed) -> Vec { + let mut content_values = vec![]; + + for field in self.content_field_names(feed) { + match field.as_str() { + "link" => content_values.push(self.link.clone()), + "title" => content_values.push(self.title.clone()), + + "publication_date" => content_values.push(self.publication_date.to_string()), + "guid" => content_values.push(self.guid.clone().unwrap_or("".to_string())), + "description" => { + content_values.push(self.description.clone().unwrap_or("".to_string())) + } + "author" => content_values.push(self.author.clone().unwrap_or("".to_string())), + &_ => (), + } + } + + content_values + } +} + pub fn create( conn: &mut PgConnection, feed: &Feed, @@ -28,7 +94,7 @@ pub fn create( let new_feed_items = fetched_items .into_iter() .map(|fetched_feed_item| { - let hash = calculate_content_hash(feed, &fetched_feed_item); + let hash = fetched_feed_item.hash(feed); NewFeedItem { feed_id: feed.id, @@ -106,43 +172,6 @@ pub fn get_latest_item(conn: &mut PgConnection, feed_id: i64) -> Option String { - let mut content_hash: String = "".to_string(); - let content_fields = feed - .content_fields - .clone() - .unwrap_or_else(|| vec!["link".to_string(), "title".to_string()]); - - for field in content_fields { - match field.as_str() { - "link" => content_hash.push_str(&fetched_feed_item.link), - "title" => content_hash.push_str(&fetched_feed_item.title), - "publication_date" => { - content_hash.push_str(&fetched_feed_item.publication_date.to_string()) - } - "guid" => { - content_hash.push_str(fetched_feed_item.guid.as_ref().unwrap_or(&"".to_string())) - } - "description" => content_hash.push_str( - fetched_feed_item - .description - .as_ref() - .unwrap_or(&"".to_string()), - ), - "author" => { - content_hash.push_str(fetched_feed_item.author.as_ref().unwrap_or(&"".to_string())) - } - &_ => (), - } - } - - let mut hasher = Sha256::new(); - hasher.update(content_hash.as_bytes()); - - let result = hasher.finalize(); - hex::encode(result) -} - #[cfg(test)] mod tests { use crate::db; diff --git a/src/sync/sync_feed_job.rs b/src/sync/sync_feed_job.rs index 6a9ed444..21281555 100644 --- a/src/sync/sync_feed_job.rs +++ b/src/sync/sync_feed_job.rs @@ -1,7 +1,10 @@ use crate::bot::telegram_client; use crate::bot::SimpleMessageParams; use crate::db; -use crate::db::{feed_items, feeds, telegram}; +use crate::db::feed_items; +use crate::db::feed_items::ContentHashable; +use crate::db::feeds; +use crate::db::telegram; use crate::models::feed::Feed; use crate::sync::reader::atom::AtomReader; use crate::sync::reader::json::JsonReader; @@ -146,7 +149,8 @@ impl SyncFeedJob { } Some(last_item_in_db) => { if last_fetched_item.publication_date >= last_item_in_db.publication_date - && last_fetched_item.link != last_item_in_db.link + && last_fetched_item.content_fields(&feed) + != last_item_in_db.content_fields(&feed) { self.create_feed_items(db_connection, feed, fetched_feed)?; } else {