Skip to content

Commit

Permalink
#324 Trimmed SD implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
IanGrimstead authored and IanGrimstead committed Sep 19, 2019
1 parent a2e0bc9 commit a1aef5a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
28 changes: 22 additions & 6 deletions scripts/vandv/ssm_reporting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from statistics import stdev, mean

import numpy as np
import pandas as pd
from scipy.stats import trim_mean

Expand Down Expand Up @@ -41,6 +42,21 @@ def html_table(results, prediction_lengths):
return results_table


def trim_proportion(data, proportion_to_cut):
# parts taken from scipy.stats.stats.trim_mean
nobs = data.shape[0]
lower_cut = int(proportion_to_cut * nobs)
upper_cut = nobs - lower_cut
if lower_cut > upper_cut:
raise ValueError("Proportion too big.")

data_tmp = np.partition(data, (lower_cut, upper_cut - 1), 0)

sl = [slice(None)] * data_tmp.ndim
sl[0] = slice(lower_cut, upper_cut)
return data_tmp[tuple(sl)]


def summary_html_table(results, prediction_lengths, trimmed_proportion_to_cut=0.1):
df_results = __create_df_from_results(prediction_lengths, results)

Expand All @@ -63,12 +79,12 @@ def summary_html_table(results, prediction_lengths, trimmed_proportion_to_cut=0.
standard_deviations[f'{prediction_length}'] = stdev(df_results[f'{prediction_length}'])
summary_df = summary_df.append(standard_deviations, ignore_index=True)

# trimmed_standard_deviations = {
# 'terms': f'<b>Trimmed ({trimmed_proportion_to_cut * 100.0:.0f}% cut) standard deviation</b>'}
# for prediction_length in prediction_lengths:
# trimmed_data = trimboth(df_results[f'{prediction_length}'], trimmed_proportion_to_cut)
# trimmed_standard_deviations[f'{prediction_length}'] = stdev(trimmed_data)
# summary_df = summary_df.append(trimmed_standard_deviations, ignore_index=True)
trimmed_standard_deviations = {
'terms': f'<b>Trimmed ({trimmed_proportion_to_cut * 100.0:.0f}% cut) standard deviation</b>'}
for prediction_length in prediction_lengths:
trimmed_data = trim_proportion(df_results[f'{prediction_length}'], trimmed_proportion_to_cut)
trimmed_standard_deviations[f'{prediction_length}'] = stdev(trimmed_data)
summary_df = summary_df.append(trimmed_standard_deviations, ignore_index=True)

summary_table = __html_table_from_dataframe(summary_df, 'SSM summary')

Expand Down
4 changes: 2 additions & 2 deletions tests/vandv/test_ssm_reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def test_summary_html_table(self):
expected_text = '''terms 2 3
Mean -146 116
Trimmed (20% cut) mean 90 60
Standard deviation 1108.82 293.053'''
# Trimmed (10% cut) standard deviation 455.073 0'''
Standard deviation 1108.82 293.053
Trimmed (20% cut) standard deviation 10 0'''

output_html = summary_html_table(results, [2, 3], trimmed_proportion_to_cut=0.2)

Expand Down

0 comments on commit a1aef5a

Please sign in to comment.