Skip to content

Commit

Permalink
feat: similarity and difference score
Browse files Browse the repository at this point in the history
fixes #10 and fixes #11
  • Loading branch information
notalfredo committed Jan 14, 2023
1 parent 6bed1e4 commit 54b7ac3
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 0 deletions.
48 changes: 48 additions & 0 deletions crates/differ/src/hamming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,52 @@ mod tests {
assert_eq!(test_diff_4, hamming("0000", "1111"));
assert_eq!(test_diff_5, hamming("2173896", "2233796"));
}

#[test]
fn test_hamming_similarity() {
use crate::hamming::hamming;
use crate::DiffScoreConfig;

let sim = hamming("karolin", "kathrin");
let config = DiffScoreConfig::default();
let similarity = ((7.0) - 3.0) / (7.0);

let sim_v2 = hamming("karolin", "kerstin");
let mut config_v2= DiffScoreConfig::default();
config_v2.sub_cost = 0.5;
let similarity_v2 = ((7.0) - 1.5) / (7.0);

let sim_v3 = hamming("kathrin", "kerstin");
let mut config_v3 = DiffScoreConfig::default();
config_v3.sub_cost = 1.5;
let similarity_v3 = ((7.0) - 6.0) / (7.0);

assert_eq!(similarity, sim.similarity(&config));
assert_eq!(similarity_v2, sim_v2.similarity(&config_v2));
assert_eq!(similarity_v3, sim_v3.similarity(&config_v3));
}

#[test]
fn test_hamming_difference() {
use crate::hamming::hamming;
use crate::DiffScoreConfig;

let diff = hamming("karolin", "kathrin");
let config = DiffScoreConfig::default();
let difference =((7.0) - 3.0) / (7.0);

let diff_v2 = hamming("karolin", "kerstin");
let mut config_v2= DiffScoreConfig::default();
config_v2.sub_cost = 0.5;
let difference_v2 = ((7.0) - 1.5) / (7.0);

let diff_v3 = hamming("kathrin", "kerstin");
let mut config_v3 = DiffScoreConfig::default();
config_v3.sub_cost = 1.5;
let difference_v3 = ((7.0) - 6.0) / (7.0);

assert_eq!(difference, diff.similarity(&config));
assert_eq!(difference_v2, diff_v2.similarity(&config_v2));
assert_eq!(difference_v3, diff_v3.similarity(&config_v3));
}
}
48 changes: 48 additions & 0 deletions crates/differ/src/levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,52 @@ mod tests {
assert_eq!(test_diff_3, levenshtein("RESET", "SETS"));
assert_eq!(test_diff_4, levenshtein("RESET", "RESETER"));
}

#[test]
fn test_levenshtein_similarity() {
use crate::levenshtein::levenshtein;
use crate::DiffScoreConfig;

let sim = levenshtein("Kittens", "kitten");
let mut config = DiffScoreConfig::default();
config.lowercase_sub_cost = 0.5;
let similarity = ((7.0) - 1.5) / (7.0);

let sim_v2 = levenshtein("cattle", "battle");
let mut config_v2 = DiffScoreConfig::default();
config_v2.sub_cost = 2.0;
let similarity_v2 = ((6.0) - 2.0) / (6.0);

let sim_v3 = levenshtein("Saturday", "Sunday");
let config_v3 = DiffScoreConfig::default();
let similarity_v3 = ((8.0) - 3.0) / (8.0);

assert_eq!(similarity, sim.similarity(&config));
assert_eq!(similarity_v2, sim_v2.similarity(&config_v2));
assert_eq!(similarity_v3, sim_v3.similarity(&config_v3));
}

#[test]
fn test_levenshtein_difference() {
use crate::levenshtein::levenshtein;
use crate::DiffScoreConfig;

let diff = levenshtein("Kittens", "kitten");
let mut config = DiffScoreConfig::default();
config.lowercase_sub_cost = 0.5;
let difference = 1.0 - ((7.0) - 1.5) / (7.0);

let diff_v2 = levenshtein("cattle", "battle");
let mut config_v2 = DiffScoreConfig::default();
config_v2.sub_cost = 2.0;
let difference_v2 = 1.0 - ((6.0) - 2.0) / (6.0);

let diff_v3 = levenshtein("Saturday", "Sunday");
let config_v3 = DiffScoreConfig::default();
let difference_v3 = 1.0 - ((8.0) - 3.0) / (8.0);

assert_eq!(difference, diff.difference(&config));
assert_eq!(difference_v2, diff_v2.difference(&config_v2));
assert_eq!(difference_v3, diff_v3.difference(&config_v3));
}
}
49 changes: 49 additions & 0 deletions crates/differ/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,32 @@ impl StringDiffOp {
}
}

#[derive(Debug, PartialEq)]
pub struct DiffScoreConfig {
pub sub_cost: f32,
pub lowercase_sub_cost: f32,
pub indel_cost: f32,
pub transpose_cost: f32,
// future properties here as needed
}

#[derive(Debug, PartialEq, Eq)]
pub struct Diff {
pub ops: Box<[StringDiffOp]>,
pub total_len: usize,
}

impl Default for DiffScoreConfig {
fn default() -> Self {
Self {
sub_cost: 1.0,
lowercase_sub_cost: 1.0,
indel_cost: 1.0,
transpose_cost: 1.0,
}
}
}

impl Diff {
pub fn new(diffs: Vec<StringDiffOp>, total_len: usize) -> Self {
Self {
Expand All @@ -55,6 +75,35 @@ impl Diff {
pub fn distance(&self) -> usize {
self.ops.len()
}

pub fn similarity(&self, score: &DiffScoreConfig) -> f32{
let mut similarity_score : f32 = self.total_len as f32;
for i in self.ops.iter(){
match i.kind {
StringDiffOpKind::Delete => {
similarity_score -= score.indel_cost;
}
StringDiffOpKind::Insert(_x) => {
similarity_score -= score.indel_cost;
}
StringDiffOpKind::Substitute(_x, _y) => {
if _x.to_ascii_lowercase() == _y.to_ascii_lowercase() {
similarity_score -= score.lowercase_sub_cost;
}
else {
similarity_score -= score.sub_cost;
}
}
StringDiffOpKind::Transpose => {
similarity_score -= score.transpose_cost;
}
}
}
similarity_score / (self.total_len as f32)
}
pub fn difference(&self, score: &DiffScoreConfig) -> f32{
1.0 - self.similarity(&score)
}
}

pub(crate) fn get_operation_matrix(
Expand Down

0 comments on commit 54b7ac3

Please sign in to comment.