Skip to content

Commit

Permalink
Scale distributions per individual chunk
Browse files Browse the repository at this point in the history
Fixes #267
  • Loading branch information
michael-nml committed Feb 23, 2024
1 parent ed391d9 commit 3ed26c8
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
8 changes: 4 additions & 4 deletions nannyml/distribution/continuous/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,11 @@ def calculate_chunk_distributions(
)
data['kde_density_local_max'] = data['kde_density'].apply(lambda x: max(x) if len(x) > 0 else 0)
data['kde_density_global_max'] = data.groupby('chunk_index')['kde_density_local_max'].max().max()
data['kde_density_scaled'] = data[['kde_density', 'kde_density_global_max']].apply(
lambda row: np.divide(np.array(row['kde_density']), row['kde_density_global_max']), axis=1
data['kde_density_scaled'] = data[['kde_density', 'kde_density_local_max']].apply(
lambda row: np.divide(np.array(row['kde_density']), row['kde_density_local_max']), axis=1
)
data['kde_quartiles_scaled'] = data[['kde_quartiles', 'kde_density_global_max']].apply(
lambda row: [(q[0], q[1] / row['kde_density_global_max'], q[2]) for q in row['kde_quartiles']], axis=1
data['kde_quartiles_scaled'] = data[['kde_quartiles', 'kde_density_local_max']].apply(
lambda row: [(q[0], q[1] / row['kde_density_local_max'], q[2]) for q in row['kde_quartiles']], axis=1
)

# TODO: Consider removing redundant columns to reduce fitted calculator memory usage
Expand Down
8 changes: 4 additions & 4 deletions nannyml/plots/components/joy_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ def calculate_chunk_distributions(
)
data['kde_density_local_max'] = data['kde_density'].apply(lambda x: max(x) if len(x) > 0 else 0)
data['kde_density_global_max'] = data.groupby('chunk_index')['kde_density_local_max'].max().max()
data['kde_density_scaled'] = data[['kde_density', 'kde_density_global_max']].apply(
lambda row: np.divide(np.array(row['kde_density']), row['kde_density_global_max']), axis=1
data['kde_density_scaled'] = data[['kde_density', 'kde_density_local_max']].apply(
lambda row: np.divide(np.array(row['kde_density']), row['kde_density_local_max']), axis=1
)
data['kde_quartiles_scaled'] = data[['kde_quartiles', 'kde_density_global_max']].apply(
lambda row: [(q[0], q[1] / row['kde_density_global_max']) for q in row['kde_quartiles']], axis=1
data['kde_quartiles_scaled'] = data[['kde_quartiles', 'kde_density_local_max']].apply(
lambda row: [(q[0], q[1] / row['kde_density_local_max']) for q in row['kde_quartiles']], axis=1
)

return data
Expand Down

0 comments on commit 3ed26c8

Please sign in to comment.