Skip to content

Commit

Permalink
update search engine
Browse files Browse the repository at this point in the history
  • Loading branch information
CatAnnaDev committed Jun 28, 2024
1 parent 8103d73 commit 65cf909
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 14 deletions.
11 changes: 5 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "neko_dl"
version = "0.14.1"
version = "0.14.2"
edition = "2021"
homepage = "https://github.com/CatAnnaDev/neko_sama_downloader"
repository = "https://github.com/CatAnnaDev/neko_sama_downloader"
Expand Down Expand Up @@ -35,12 +35,11 @@ crossbeam-channel = "0.5.13"
serde_derive = "1.0.203"
crossbeam = "0.8.4"
regex = "1.10.5"
quick-xml = "0.33.0"
quick-xml = "0.34.0"
tokio = { version = "1.38.0", features = ["full"] }
thirtyfour = "0.33.0-alpha.2"
ctrlc = "3.4.4"
strsim = "0.11.1"
serde_json = "1.0.117"
serde_json = "1.0.118"
serde = "1.0.203"
requestty = "0.5.0"
m3u8-rs = "6.0.0"
Expand Down
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ async fn setup_search_or_download(new_args: &mut Args)
Some(vec![ProcessingUrl {
name: "".to_string(),
ep: "".to_string(),
_description: "".to_string(),
url: url.to_string(),
genre: "".to_string(),
}])
Expand Down
39 changes: 35 additions & 4 deletions src/search_engine/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::web_client::web;
pub struct ProcessingUrl {
pub name: String,
pub ep: String,
pub _description: String,
pub url: String,
pub genre: String,
}
Expand Down Expand Up @@ -42,20 +43,23 @@ pub async fn search_over_json(

for x in v {
let cleaned_title = clean_string(&x.title);
let cleaned_description = clean_string(&x.others);

let levenshtein_distance = strsim::levenshtein(&cleaned_name, &cleaned_title) as f64;
let levenshtein_distance = levenshtein_distance(&cleaned_name, &cleaned_title) as f64;
let max_length = cleaned_name.len().max(cleaned_title.len()) as f64;
let levenshtein_similarity = 1.0 - levenshtein_distance / max_length;

if jaccard_similarity(&cleaned_name, &cleaned_title) > 0.8
|| levenshtein_similarity > 0.8
if jaccard_similarity(&cleaned_name, &cleaned_title) > 0.7
|| levenshtein_similarity > 0.7
|| cleaned_title.contains(&cleaned_name)
|| cleaned_description.contains(&cleaned_name)
{
let x = ProcessingUrl {
name: x.title,
ep: x.nb_eps,
_description: x.others,
url: x.url,
genre: x.genres.join(", ").replace("c0m1dy", "comedy"),
genre: x.genres.join(", "),
};
if *debug {
debug!("Search engine {:#?}", x);
Expand All @@ -77,6 +81,33 @@ fn clean_string(s: &str) -> String {
.to_lowercase()
}

fn levenshtein_distance(word1: &str, word2: &str) -> usize {
let w1 = word1.chars().collect::<Vec<_>>();
let w2 = word2.chars().collect::<Vec<_>>();

let word1_length = w1.len() + 1;
let word2_length = w2.len() + 1;

let mut matrix = vec![vec![0; word1_length]; word2_length];

for i in 1..word1_length { matrix[0][i] = i; }
for j in 1..word2_length { matrix[j][0] = j; }

for j in 1..word2_length {
for i in 1..word1_length {
let x: usize = if w1[i-1] == w2[j-1] {
matrix[j-1][i-1]
} else {
1 + std::cmp::min(
std::cmp::min(matrix[j][i-1], matrix[j-1][i])
, matrix[j-1][i-1])
};
matrix[j][i] = x;
}
}
matrix[word2_length-1][word1_length-1]
}

fn jaccard_similarity(s1: &str, s2: &str) -> f64 {
let set1: std::collections::HashSet<_> = s1.chars().collect();
let set2: std::collections::HashSet<_> = s2.chars().collect();
Expand Down

0 comments on commit 65cf909

Please sign in to comment.