Skip to content

Commit

Permalink
Merge pull request #867 from shawnlaffan/issue_866_mx_summary_stats
Browse files Browse the repository at this point in the history
Issue 866 mx summary stats
  • Loading branch information
shawnlaffan authored May 15, 2023
2 parents 43c1f86 + 186c0f6 commit 76fd253
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 53 deletions.
63 changes: 21 additions & 42 deletions lib/Biodiverse/Matrix.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use 5.010;
our $VERSION = '4.99_001';

use English ( -no_match_vars );
use experimental qw /refaliasing/;

use Carp;

Expand Down Expand Up @@ -390,51 +391,29 @@ sub get_max_value {
sub get_summary_stats {
my $self = shift;

my $n = $self->get_element_pair_count;
my ( $sumx, $sumx_sqr );
my @percentile_targets = qw /2.5 5 95 97.5/;
my @percentile_target_counts;
foreach my $pct (@percentile_targets) {
push @percentile_target_counts, $n * $pct / 100; # should floor it?
my %data;
\my %top_level = $self->{BYELEMENT};
foreach my $href (values %top_level) {
$data{$_}++ for values %$href;
}
my %percentile_hash;

my $count;

my $values_hash = $self->{BYVALUE};
BY_VALUE:
foreach my $value ( sort { $a <=> $b } keys %$values_hash ) {
my $hash = $values_hash->{$value};
my $sub_count = scalar keys %$hash;
$sumx += $value * $sub_count;
$sumx_sqr += ( $value**2 ) * $sub_count;
$count += $sub_count;

FIND_PCTL:
foreach my $target (@percentile_target_counts) {
last FIND_PCTL if $count < $target;
my $percentile = shift @percentile_targets;
$percentile_hash{$percentile} = $value;
shift @percentile_target_counts;
}
}

my $max = $self->get_max_value;
my $min = $self->get_min_value;

my %stats = (
MAX => $max,
MIN => $min,
MEAN => $sumx / $n,

#SD => undef,
PCT025 => ( $percentile_hash{'2.5'} // $min ),
PCT975 => ( $percentile_hash{'97.5'} // $max ),
PCT05 => ( $percentile_hash{'5'} // $min ),
PCT95 => ( $percentile_hash{'95'} // $max ),
my %r;
my $stats
= Statistics::Descriptive::PDL::SampleWeighted->new;
$stats->add_data (\%data);
%r = (
MAX => $stats->max,
MIN => $stats->min,
MEAN => $stats->mean,
SD => $stats->standard_deviation,
);
@r{qw /PCT025 PCT05 PCT95 PCT975/} = $stats->percentiles(2.5, 5, 95, 97.5);

use constant PRECISION => 10**13;
foreach my $key (keys %r) {
$r{$key} = $self->round_to_precision_aa($r{$key}, PRECISION) + 0;
}

return wantarray ? %stats : \%stats;
return wantarray ? %r : \%r;
}

# add an element pair to the object
Expand Down
32 changes: 21 additions & 11 deletions t/26-Cluster2.t
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,21 @@ sub test_phylo_rw_turnover_mx {
my $mx = $mx_arr->[0];

my %expected = (
PCT975 => 0.805970149253731,
PCT95 => 0.805970149253731,
PCT975 => 0.8059701493,
PCT95 => 0.8059701493,
MIN => 0,
PCT025 => 0,
PCT05 => 0.32,
MAX => 0.805970149253731,
MEAN => 0.162,
PCT05 => 0,
MAX => 0.8059701493,
MEAN => 0.3751810522,
SD => 0.3047706206,
);

my $stats = $mx->get_summary_stats;
# avoid fp precision issues in the comparison
foreach my $k (keys %$stats) {
$stats->{$k} = $mx->round_to_precision_aa ($stats->{$k}, 10**10);
}
is ($stats, \%expected, 'got expected stats for phylo_rw_turnover mx');
}

Expand Down Expand Up @@ -192,17 +197,22 @@ sub test_rw_turnover_mx {
my $mx = $mx_arr->[0];

my %expected = (
PCT975 => 0.833333333333333,
PCT95 => 0.833333333333333,
PCT975 => 0.8333333333,
PCT95 => 0.8333333333,
MIN => 0,
PCT025 => 0,
PCT05 => 0.33,
MAX => 0.833333333333333,
MEAN => 0.167333333333333,
PCT05 => 0,
MAX => 0.8333333333,
MEAN => 0.3923076923,
SD => 0.316227766,
);

my $stats = $mx->get_summary_stats;
is ($stats, \%expected, 'got expected stats for phylo_rw_turnover mx');
# avoid fp precision issues in the comparison
foreach my $k (keys %$stats) {
$stats->{$k} = $mx->round_to_precision_aa ($stats->{$k}, 10**10);
}
is ($stats, \%expected, 'got expected stats for rw_turnover mx');
}

1;
Expand Down

0 comments on commit 76fd253

Please sign in to comment.