Skip to content

Commit

Permalink
Adjust the dimension filter and add minimal segment size
Browse files Browse the repository at this point in the history
  • Loading branch information
dyang415 committed Sep 29, 2023
1 parent 6f47a96 commit 06550e1
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions backend/app/insight/services/insight_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,9 @@ def gen_sub_df_for_columns(columns: List[str]):
) for columns in column_combinations_list]
wait(futures)

multi_dimension_grouping_result = polars.concat(
[future.result() for future in futures])
total_rows = self.overall_aggregated_df['count_baseline'].sum() + self.overall_aggregated_df['count'].sum()
multi_dimension_grouping_result = polars.concat([future.result() for future in futures]) \
.filter((polars.col("count") + polars.col("count_baseline")) / polars.lit(total_rows) > 0.01)

dimension_info_df = multi_dimension_grouping_result.filter(polars.col("dimension_name").list.lengths() == 1) \
.with_columns(polars.col("dimension_name").list.first()) \
Expand All @@ -262,7 +263,7 @@ def gen_sub_df_for_columns(columns: List[str]):
.select('dimension_name', "score") \
.with_columns([polars.col("score").mean().alias("score_mean"),
polars.col("score").std().alias("score_std")])
dimensions = [Dimension(row['dimension_name'], row['score'], row['score'] > row['score_mean'] and row['score'] > 0.02) for row in
dimensions = [Dimension(row['dimension_name'], row['score'], row['score'] > row['score_mean'] or row['score'] > 0.02) for row in
dimension_info_df.rows(named=True)]

weighted_change_mean = multi_dimension_grouping_result.select(
Expand Down

0 comments on commit 06550e1

Please sign in to comment.