Skip to content

Commit

Permalink
Compare content fields when syncronizing (#312)
Browse files Browse the repository at this point in the history
Currently only a link of feed items is used
  • Loading branch information
ayrat555 authored Feb 25, 2023
1 parent 4856f21 commit cebc1f4
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 40 deletions.
105 changes: 67 additions & 38 deletions src/db/feed_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,72 @@ pub struct NewFeedItem {
pub content_hash: String,
}

pub trait ContentHashable {
fn content_fields(&self, feed: &Feed) -> Vec<String>;

fn hash(&self, feed: &Feed) -> String {
let content_string = self.content_fields(feed).join("");

let mut hasher = Sha256::new();
hasher.update(content_string.as_bytes());

let result = hasher.finalize();
hex::encode(result)
}

fn content_field_names(&self, feed: &Feed) -> Vec<String> {
feed.content_fields
.clone()
.unwrap_or_else(|| vec!["link".to_string(), "title".to_string()])
}
}

impl ContentHashable for FetchedFeedItem {
fn content_fields(&self, feed: &Feed) -> Vec<String> {
let mut content_values = vec![];

for field in self.content_field_names(feed) {
match field.as_str() {
"link" => content_values.push(self.link.clone()),
"title" => content_values.push(self.title.clone()),

"publication_date" => content_values.push(self.publication_date.to_string()),
"guid" => content_values.push(self.guid.clone().unwrap_or("".to_string())),
"description" => {
content_values.push(self.description.clone().unwrap_or("".to_string()))
}
"author" => content_values.push(self.author.clone().unwrap_or("".to_string())),
&_ => (),
}
}

content_values
}
}

impl ContentHashable for FeedItem {
fn content_fields(&self, feed: &Feed) -> Vec<String> {
let mut content_values = vec![];

for field in self.content_field_names(feed) {
match field.as_str() {
"link" => content_values.push(self.link.clone()),
"title" => content_values.push(self.title.clone()),

"publication_date" => content_values.push(self.publication_date.to_string()),
"guid" => content_values.push(self.guid.clone().unwrap_or("".to_string())),
"description" => {
content_values.push(self.description.clone().unwrap_or("".to_string()))
}
"author" => content_values.push(self.author.clone().unwrap_or("".to_string())),
&_ => (),
}
}

content_values
}
}

pub fn create(
conn: &mut PgConnection,
feed: &Feed,
Expand All @@ -28,7 +94,7 @@ pub fn create(
let new_feed_items = fetched_items
.into_iter()
.map(|fetched_feed_item| {
let hash = calculate_content_hash(feed, &fetched_feed_item);
let hash = fetched_feed_item.hash(feed);

NewFeedItem {
feed_id: feed.id,
Expand Down Expand Up @@ -106,43 +172,6 @@ pub fn get_latest_item(conn: &mut PgConnection, feed_id: i64) -> Option<FeedItem
}
}

pub fn calculate_content_hash(feed: &Feed, fetched_feed_item: &FetchedFeedItem) -> String {
let mut content_hash: String = "".to_string();
let content_fields = feed
.content_fields
.clone()
.unwrap_or_else(|| vec!["link".to_string(), "title".to_string()]);

for field in content_fields {
match field.as_str() {
"link" => content_hash.push_str(&fetched_feed_item.link),
"title" => content_hash.push_str(&fetched_feed_item.title),
"publication_date" => {
content_hash.push_str(&fetched_feed_item.publication_date.to_string())
}
"guid" => {
content_hash.push_str(fetched_feed_item.guid.as_ref().unwrap_or(&"".to_string()))
}
"description" => content_hash.push_str(
fetched_feed_item
.description
.as_ref()
.unwrap_or(&"".to_string()),
),
"author" => {
content_hash.push_str(fetched_feed_item.author.as_ref().unwrap_or(&"".to_string()))
}
&_ => (),
}
}

let mut hasher = Sha256::new();
hasher.update(content_hash.as_bytes());

let result = hasher.finalize();
hex::encode(result)
}

#[cfg(test)]
mod tests {
use crate::db;
Expand Down
8 changes: 6 additions & 2 deletions src/sync/sync_feed_job.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use crate::bot::telegram_client;
use crate::bot::SimpleMessageParams;
use crate::db;
use crate::db::{feed_items, feeds, telegram};
use crate::db::feed_items;
use crate::db::feed_items::ContentHashable;
use crate::db::feeds;
use crate::db::telegram;
use crate::models::feed::Feed;
use crate::sync::reader::atom::AtomReader;
use crate::sync::reader::json::JsonReader;
Expand Down Expand Up @@ -146,7 +149,8 @@ impl SyncFeedJob {
}
Some(last_item_in_db) => {
if last_fetched_item.publication_date >= last_item_in_db.publication_date
&& last_fetched_item.link != last_item_in_db.link
&& last_fetched_item.content_fields(&feed)
!= last_item_in_db.content_fields(&feed)
{
self.create_feed_items(db_connection, feed, fetched_feed)?;
} else {
Expand Down

0 comments on commit cebc1f4

Please sign in to comment.