Skip to content

Commit

Permalink
Ensure we always trim our calculations results down to 1.0 instead of…
Browse files Browse the repository at this point in the history
… the threshold value. After running the calculation, remove any results that are above our threshold. Use a higher level of precision when converting our decimals to percentages so we can more easily see the differences in each value.
  • Loading branch information
dkotter committed Dec 7, 2023
1 parent c79ec83 commit ae66799
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 21 deletions.
19 changes: 3 additions & 16 deletions includes/Classifai/Providers/OpenAI/EmbeddingCalculations.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ class EmbeddingCalculations {
*
* @param array $source_embedding Embedding data of the source item.
* @param array $compare_embedding Embedding data of the item to compare.
* @param float $threshold The threshold to use for the similarity calculation.
*
* @return bool|float
*/
public function similarity( array $source_embedding = [], array $compare_embedding = [], $threshold = 1 ) {
public function similarity( array $source_embedding = [], array $compare_embedding = [] ) {
if ( empty( $source_embedding ) || empty( $compare_embedding ) ) {
return false;
}
Expand Down Expand Up @@ -58,20 +57,8 @@ function( $x ) {
// Do the math.
$distance = 1.0 - ( $combined_average / sqrt( $source_average * $compare_average ) );

/**
* Filter the threshold for the similarity calculation.
*
* @since 2.5.0
* @hook classifai_threshold
*
* @param {float} $threshold The threshold to use.
*
* @return {float} The threshold to use.
*/
$threshold = apply_filters( 'classifai_threshold', $threshold );

// Ensure we are within the range of 0 to 1.0 (i.e. $threshold).
return max( 0, min( abs( (float) $distance ), $threshold ) );
// Ensure we are within the range of 0 to 1.0.
return max( 0, min( abs( (float) $distance ), 1.0 ) );
}

}
10 changes: 5 additions & 5 deletions includes/Classifai/Providers/OpenAI/Embeddings.php
Original file line number Diff line number Diff line change
Expand Up @@ -604,14 +604,14 @@ private function get_terms( array $embedding = [] ) {

$term_added = 0;
foreach ( $terms as $term_id => $similarity ) {
// Convert $similarity to percentage.
$similarity = round( ( 1 - $similarity ), 2 );

// Stop if we have added the number of terms specified in settings.
if ( $number_to_add <= $term_added ) {
break;
}

// Convert $similarity to percentage.
$similarity = round( ( 1 - $similarity ), 10 );

$result[ $index ]->{$tax_name}[] = [// phpcs:ignore Squiz.PHP.DisallowMultipleAssignments.Found
'label' => get_term( $term_id )->name,
'score' => $similarity,
Expand Down Expand Up @@ -671,8 +671,8 @@ private function get_embeddings_similarity( $embedding ) {
$term_embedding = get_term_meta( $term_id, 'classifai_openai_embeddings', true );

if ( $term_embedding ) {
$similarity = $calculations->similarity( $embedding, $term_embedding, $threshold );
if ( false !== $similarity ) {
$similarity = $calculations->similarity( $embedding, $term_embedding );
if ( false !== $similarity || $similarity <= $threshold ) {
$embedding_similarity[ $tax ][ $term_id ] = $similarity;
}
}
Expand Down

0 comments on commit ae66799

Please sign in to comment.