Skip to content

Commit

Permalink
#324 create table from state space results - with tests
Browse files Browse the repository at this point in the history
* Trimmed SD not implemented
  • Loading branch information
IanGrimstead authored and IanGrimstead committed Sep 19, 2019
1 parent 01d45a3 commit a2e0bc9
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 34 deletions.
70 changes: 44 additions & 26 deletions scripts/vandv/ssm_reporting.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
from statistics import stdev, mean

import pandas as pd
from scipy.stats import trim_mean


def __html_table_from_dataframe(df_results, terms, term_style='{:.0}'):
df_summary_table = df_results.style.hide_index()
df_summary_table = df_summary_table.set_table_styles([
def __html_table_from_dataframe(df_results, terms, term_style='{:.0f}'):
df_table = df_results.style.hide_index()
df_table = df_table.set_table_styles([
dict(selector='table', props=[('border-collapse', 'collapse')]),
dict(selector='td', props=[('border', '2px solid black'),
('text-align', 'right'),
('padding-left', '15px'),
('padding-right', '15px')])
])
for term in terms:
df_summary_table = df_summary_table.format({term: term_style})
df_summary_table = df_summary_table.highlight_max(axis=1)
df_table = df_table.format({term: term_style})
df_table = df_table.highlight_max(axis=1)

heading = '<style type="text/css">table {border-collapse: collapse;} </style>\n'
return heading + df_summary_table.render()
return heading + df_table.render()


def html_table(results, prediction_lengths):
def __create_df_from_results(prediction_lengths, results):
df_results = pd.DataFrame({'terms': list(results.keys())})
for prediction_length in prediction_lengths:
prediction_length_results = []
Expand All @@ -28,30 +30,46 @@ def html_table(results, prediction_lengths):

df_term_column = pd.DataFrame({f'{prediction_length}': prediction_length_results})
df_results = df_results.join(df_term_column)
return df_results


def html_table(results, prediction_lengths):
df_results = __create_df_from_results(prediction_lengths, results)

results_table = __html_table_from_dataframe(df_results, 'SSM')

trimmed_mean_proportion_to_cut = 0.1
return results_table


def summary_html_table(results, prediction_lengths, trimmed_proportion_to_cut=0.1):
df_results = __create_df_from_results(prediction_lengths, results)

means = {
'terms': f'<b>Mean</b>'}
for prediction_length in prediction_lengths:
means[f'{prediction_length}'] = mean(df_results[f'{prediction_length}'])
summary_df = pd.DataFrame(means, index=[0])

trimmed_means = {
'terms': f'<b>Trimmed ({trimmed_mean_proportion_to_cut * 100.0:.0f}% cut) <br/> mean</b>'}
'terms': f'<b>Trimmed ({trimmed_proportion_to_cut * 100.0:.0f}% cut) mean</b>'}
for prediction_length in prediction_lengths:
trimmed_means[f'{prediction_length}'] = trim_mean(df_results[f'{prediction_length}'],
trimmed_mean_proportion_to_cut)
# print(f'{term_name} trimmed mean={trimmed_means:.1f}')
df_results = df_results.append(trimmed_means, ignore_index=True)

# standard_deviations = {
# 'terms': f'<b>Standard deviation <br/>of {metric_name}</b>'}
# for predictor_name in predictor_names:
# predictor_display_name = predictor_name.replace('-', '<br/>')
# results_without_nan = [x for x in df_results[predictor_display_name] if not math.isnan(x)]
# standard_deviations[predictor_display_name] = stdev(results_without_nan)
# print(f'{predictor_name} {metric_name} standard deviation={standard_deviations[predictor_display_name]:.1f}')
# df_results = df_results.append(standard_deviations, ignore_index=True)

summary_df = pd.DataFrame(trimmed_means, index=[0])
summary_table = __html_table_from_dataframe(summary_df, 'SSM summary')
trimmed_proportion_to_cut)
summary_df = summary_df.append(trimmed_means, ignore_index=True)

# summary_df = summary_df.append(standard_deviations, ignore_index=True)
standard_deviations = {
'terms': f'<b>Standard deviation</b>'}
for prediction_length in prediction_lengths:
standard_deviations[f'{prediction_length}'] = stdev(df_results[f'{prediction_length}'])
summary_df = summary_df.append(standard_deviations, ignore_index=True)

# trimmed_standard_deviations = {
# 'terms': f'<b>Trimmed ({trimmed_proportion_to_cut * 100.0:.0f}% cut) standard deviation</b>'}
# for prediction_length in prediction_lengths:
# trimmed_data = trimboth(df_results[f'{prediction_length}'], trimmed_proportion_to_cut)
# trimmed_standard_deviations[f'{prediction_length}'] = stdev(trimmed_data)
# summary_df = summary_df.append(trimmed_standard_deviations, ignore_index=True)

summary_table = __html_table_from_dataframe(summary_df, 'SSM summary')

return f'<h2>State Space Model Results</h2>\n{results_table}<p/>{summary_table}\n'
return summary_table
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def setup_package():
install_requires=['matplotlib', 'numpy', 'scipy==1.2.1', 'wordcloud', 'pandas', 'tqdm', 'nltk', 'scikit-learn',
'xlrd', 'python-Levenshtein', 'gensim==3.4.0', 'statsmodels', 'keras', 'tensorflow',
'keras_tqdm', 'patsy', 'humanfriendly', 'psutil', 'jinja2', 'urllib3==1.22'],
# extras_require={'dev': ['check-manifest'],'test': ['coverage'],},
extras_require={'test': ['beautifulsoup4']},
python_requires='>=3.6',
cmdclass={
'install': CustomInstaller,
Expand Down
49 changes: 42 additions & 7 deletions tests/vandv/test_ssm_reporting.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,53 @@
import unittest

from scripts.vandv.ssm_reporting import html_table
from bs4 import BeautifulSoup

from scripts.vandv.ssm_reporting import html_table, summary_html_table


def extract_table_text_from_html(soup):
for tag in soup.children:
if tag.name == 'table':
table_text = tag.text.replace('\n\n', '§').replace('\n', ' ').replace('§', '\n').strip()
table_lines = [x.strip() for x in table_text.split('\n')]
return '\n'.join(table_lines)
return None


class SSMReporting(unittest.TestCase):
def test_html_table(self):
results = {
'sample term': {2: 78, 3: 60}
'sample term': {2: 78, 3: 60},
'extra term': {2: 93, 3: 87}
}
expected_text = '''terms 2 3
sample term 78 60
extra term 93 87'''

output_html = html_table(results, [2, 3])

self.assertEqual(
'''
'''
, output_html
)
soup = BeautifulSoup(output_html, 'html.parser')

actual_text = extract_table_text_from_html(soup)
self.assertEqual(expected_text, actual_text)

def test_summary_html_table(self):
results = {
'sample term': {2: 80, 3: 60},
'extra term': {2: 90, 3: 60},
'third term': {2: 100, 3: 60},
'fourth term': {2: 1000, 3: -200},
'fifth term': {2: -2000, 3: 600}
}
expected_text = '''terms 2 3
Mean -146 116
Trimmed (20% cut) mean 90 60
Standard deviation 1108.82 293.053'''
# Trimmed (10% cut) standard deviation 455.073 0'''

output_html = summary_html_table(results, [2, 3], trimmed_proportion_to_cut=0.2)

soup = BeautifulSoup(output_html, 'html.parser')

actual_text = extract_table_text_from_html(soup)
self.assertEqual(expected_text, actual_text)

0 comments on commit a2e0bc9

Please sign in to comment.