Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare content fields when syncronizing #312

Merged
merged 1 commit into from
Feb 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 67 additions & 38 deletions src/db/feed_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,72 @@ pub struct NewFeedItem {
pub content_hash: String,
}

pub trait ContentHashable {
fn content_fields(&self, feed: &Feed) -> Vec<String>;

fn hash(&self, feed: &Feed) -> String {
let content_string = self.content_fields(feed).join("");

let mut hasher = Sha256::new();
hasher.update(content_string.as_bytes());

let result = hasher.finalize();
hex::encode(result)
}

fn content_field_names(&self, feed: &Feed) -> Vec<String> {
feed.content_fields
.clone()
.unwrap_or_else(|| vec!["link".to_string(), "title".to_string()])
}
}

impl ContentHashable for FetchedFeedItem {
fn content_fields(&self, feed: &Feed) -> Vec<String> {
let mut content_values = vec![];

for field in self.content_field_names(feed) {
match field.as_str() {
"link" => content_values.push(self.link.clone()),
"title" => content_values.push(self.title.clone()),

"publication_date" => content_values.push(self.publication_date.to_string()),
"guid" => content_values.push(self.guid.clone().unwrap_or("".to_string())),
"description" => {
content_values.push(self.description.clone().unwrap_or("".to_string()))
}
"author" => content_values.push(self.author.clone().unwrap_or("".to_string())),
&_ => (),
}
}

content_values
}
}

impl ContentHashable for FeedItem {
fn content_fields(&self, feed: &Feed) -> Vec<String> {
let mut content_values = vec![];

for field in self.content_field_names(feed) {
match field.as_str() {
"link" => content_values.push(self.link.clone()),
"title" => content_values.push(self.title.clone()),

"publication_date" => content_values.push(self.publication_date.to_string()),
"guid" => content_values.push(self.guid.clone().unwrap_or("".to_string())),
"description" => {
content_values.push(self.description.clone().unwrap_or("".to_string()))
}
"author" => content_values.push(self.author.clone().unwrap_or("".to_string())),
&_ => (),
}
}

content_values
}
}

pub fn create(
conn: &mut PgConnection,
feed: &Feed,
Expand All @@ -28,7 +94,7 @@ pub fn create(
let new_feed_items = fetched_items
.into_iter()
.map(|fetched_feed_item| {
let hash = calculate_content_hash(feed, &fetched_feed_item);
let hash = fetched_feed_item.hash(feed);

NewFeedItem {
feed_id: feed.id,
Expand Down Expand Up @@ -106,43 +172,6 @@ pub fn get_latest_item(conn: &mut PgConnection, feed_id: i64) -> Option<FeedItem
}
}

pub fn calculate_content_hash(feed: &Feed, fetched_feed_item: &FetchedFeedItem) -> String {
let mut content_hash: String = "".to_string();
let content_fields = feed
.content_fields
.clone()
.unwrap_or_else(|| vec!["link".to_string(), "title".to_string()]);

for field in content_fields {
match field.as_str() {
"link" => content_hash.push_str(&fetched_feed_item.link),
"title" => content_hash.push_str(&fetched_feed_item.title),
"publication_date" => {
content_hash.push_str(&fetched_feed_item.publication_date.to_string())
}
"guid" => {
content_hash.push_str(fetched_feed_item.guid.as_ref().unwrap_or(&"".to_string()))
}
"description" => content_hash.push_str(
fetched_feed_item
.description
.as_ref()
.unwrap_or(&"".to_string()),
),
"author" => {
content_hash.push_str(fetched_feed_item.author.as_ref().unwrap_or(&"".to_string()))
}
&_ => (),
}
}

let mut hasher = Sha256::new();
hasher.update(content_hash.as_bytes());

let result = hasher.finalize();
hex::encode(result)
}

#[cfg(test)]
mod tests {
use crate::db;
Expand Down
8 changes: 6 additions & 2 deletions src/sync/sync_feed_job.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use crate::bot::telegram_client;
use crate::bot::SimpleMessageParams;
use crate::db;
use crate::db::{feed_items, feeds, telegram};
use crate::db::feed_items;
use crate::db::feed_items::ContentHashable;
use crate::db::feeds;
use crate::db::telegram;
use crate::models::feed::Feed;
use crate::sync::reader::atom::AtomReader;
use crate::sync::reader::json::JsonReader;
Expand Down Expand Up @@ -146,7 +149,8 @@ impl SyncFeedJob {
}
Some(last_item_in_db) => {
if last_fetched_item.publication_date >= last_item_in_db.publication_date
&& last_fetched_item.link != last_item_in_db.link
&& last_fetched_item.content_fields(&feed)
!= last_item_in_db.content_fields(&feed)
{
self.create_feed_items(db_connection, feed, fetched_feed)?;
} else {
Expand Down