-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
florian
committed
Jun 15, 2024
1 parent
f36ea6e
commit 3ec5cc2
Showing
6 changed files
with
160 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import polars as pl | ||
|
||
|
||
def calculate_score(df: pl.DataFrame, weight_similarity=0.5, weight_weekly_downloads=0.5) -> pl.DataFrame: | ||
""" | ||
Calculate a combined score based on similarity and weekly downloads. | ||
The function ranks the similarity and weekly downloads, normalizes these ranks to a [0, 1] scale, | ||
and then computes a combined score based on the provided weights for similarity and weekly downloads. | ||
The DataFrame is sorted by the combined score in descending order. | ||
Args: | ||
df (pl.DataFrame): DataFrame containing 'similarity' and 'weekly_downloads' columns. | ||
weight_similarity (float): Weight for the similarity score in the combined score calculation. Default is 0.5. | ||
weight_weekly_downloads (float): Weight for the weekly downloads score in the combined score calculation. Default is 0.5. | ||
""" | ||
df = df.with_columns( | ||
rank_similarity=pl.col("similarity").rank("dense", descending=False), | ||
rank_weekly_downloads=pl.col("weekly_downloads").rank("dense", descending=False), | ||
) | ||
|
||
df = df.with_columns( | ||
normalized_similarity=(pl.col("rank_similarity") - 1) / (df["rank_similarity"].max() - 1), | ||
normalized_weekly_downloads=(pl.col("rank_weekly_downloads") - 1) / (df["rank_weekly_downloads"].max() - 1), | ||
) | ||
|
||
df = df.with_columns( | ||
score=weight_similarity * pl.col("normalized_similarity") | ||
+ weight_weekly_downloads * pl.col("normalized_weekly_downloads") | ||
) | ||
|
||
df = df.sort("score", descending=True) | ||
return df |