Skip to content

Commit

Permalink
#324 run() now calls state space model prediction rather than ARIMA…
Browse files Browse the repository at this point in the history
… et al

* No longer returns test values for CSV recording
  • Loading branch information
IanGrimstead authored and IanGrimstead committed Sep 19, 2019
1 parent f68f834 commit d2c39da
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 43 deletions.
28 changes: 14 additions & 14 deletions pygrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,20 +217,20 @@ def main(supplied_args):

html_results, training_values = pipeline.run(predictors_to_run, normalized=args.normalised,
train_test=args.test, emergence=emergence)
if training_values is None:
continue
# save training_values to csv file
#
# training_values: csv file:
# {'term1': [0,2,4,6], 'term2': [2,4,1,3]} 'term1', 0, 2, 4, 6
# 'term2', 2, 4, 1, 3
#
filename = os.path.join('outputs', 'emergence', args.outputs_name + '_' + emergence + '_time_series.csv')
with open(filename, 'w') as f:
w = csv.writer(f)
for key, values in training_values:
my_list = ["'" + str(key) + "'"] + values
w.writerow(my_list)
if training_values is not None:
# save training_values to csv file
#
# training_values: csv file:
# {'term1': [0,2,4,6], 'term2': [2,4,1,3]} 'term1', 0, 2, 4, 6
# 'term2', 2, 4, 1, 3
#
filename = os.path.join('outputs', 'emergence',
args.outputs_name + '_' + emergence + '_time_series.csv')
with open(filename, 'w') as f:
w = csv.writer(f)
for key, values in training_values:
my_list = ["'" + str(key) + "'"] + values
w.writerow(my_list)

html_doc = f'''<!DOCTYPE html>
<html lang="en">
Expand Down
64 changes: 35 additions & 29 deletions scripts/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,22 @@

import numpy as np
from scipy.signal import savgol_filter
from tqdm import tqdm
from sklearn.linear_model import LinearRegression
from tqdm import tqdm

import scripts.data_factory as data_factory
import scripts.output_factory as output_factory
import scripts.utils.date_utils
from scripts.algorithms.code.ssm import StateSpaceModel
from scripts.algorithms.emergence import Emergence
from scripts.algorithms.predictor_factory import PredictorFactory
from scripts.documents_filter import DocumentsFilter
from scripts.filter_terms import FilterTerms
from scripts.text_processing import LemmaTokenizer, WordAnalyzer, lowercase_strip_accents_and_ownership
from scripts.tfidf_mask import TfidfMask
from scripts.tfidf_reduce import TfidfReduce
from scripts.tfidf_wrapper import tfidf_subset_from_features, tfidf_from_text
from scripts.utils import utils
from scripts.vandv.emergence_labels import map_prediction_to_emergence_label, report_predicted_emergence_labels_html
from scripts.vandv.graphs import report_prediction_as_graphs_html
from scripts.vandv.predictor import evaluate_prediction
from scripts.vandv import ssm_reporting


class Pipeline(object):
Expand Down Expand Up @@ -292,10 +289,6 @@ def __init__(self, data_filename, docs_mask_dict, pick_method='sum', ngram_range
self.__declining.reverse()
self.__stationary = [x[0] for x in utils.stationary_terms(self.__emergence_list, nterms2)]

# self.get_state_space_forecast(self.__timeseries_quarterly, self.__emergent, self.__term_ngrams)
results = self.evaluate_state_space_pred(self.__timeseries_quarterly, self.__timeseries_derivatives,
self.__emergent, self.__term_ngrams, window=20 )
print(results)

def label_prediction(self, derivatives, k=5):
sum_derivatives = sum(derivatives)
Expand Down Expand Up @@ -445,24 +438,37 @@ def run(self, predictors_to_run, emergence, normalized=False, train_test=False):
f' likely because -mpq is too large for dataset provided')
return '', None

html_results = ''

results, training_values, test_values, smoothed_training_values, smoothed_test_values = evaluate_prediction(
self.__timeseries_quarterly, self.__term_ngrams, predictors_to_run, test_terms=terms,
test_forecasts=train_test, timeseries_all=self.__number_of_patents_per_week if normalized else None,
num_prediction_periods=self.__M, smoothed_series=self.__timeseries_quarterly_smoothed)

predicted_emergence = map_prediction_to_emergence_label(results, smoothed_training_values, smoothed_test_values,
predictors_to_run, test_terms=terms)

html_results += report_predicted_emergence_labels_html(predicted_emergence)

html_results += report_prediction_as_graphs_html(results, predictors_to_run, self.__weekly_iso_dates,
test_values=test_values,
smoothed_test_values=smoothed_test_values,
test_terms=terms, training_values=training_values,
smoothed_training_values=smoothed_training_values,
normalised=normalized,
test_forecasts=train_test, lims=self.__lims)
# self.get_state_space_forecast(self.__timeseries_quarterly, self.__emergent, self.__term_ngrams)
results = self.evaluate_state_space_pred(self.__timeseries_quarterly, self.__timeseries_derivatives,
terms, self.__term_ngrams, window=20)
print(results)

return html_results, training_values.items()
html_results = ''
prediction_lengths = results.values()[0]
html_results += f'<h2>State Space Model: {emergence} terms</h2>\n'
html_results += f'<h3>Term analysis</h2>\n'
html_results += ssm_reporting.html_table(results, prediction_lengths)
html_results += f'<h3>Analysis summary</h2>\n'
html_results += ssm_reporting.summary_html_table(results, prediction_lengths)

return html_results, None

# results, training_values, test_values, smoothed_training_values, smoothed_test_values = evaluate_prediction(
# self.__timeseries_quarterly, self.__term_ngrams, predictors_to_run, test_terms=terms,
# test_forecasts=train_test, timeseries_all=self.__number_of_patents_per_week if normalized else None,
# num_prediction_periods=self.__M, smoothed_series=self.__timeseries_quarterly_smoothed)
#
# predicted_emergence = map_prediction_to_emergence_label(results, smoothed_training_values, smoothed_test_values,
# predictors_to_run, test_terms=terms)
#
# html_results += report_predicted_emergence_labels_html(predicted_emergence)
#
# html_results += report_prediction_as_graphs_html(results, predictors_to_run, self.__weekly_iso_dates,
# test_values=test_values,
# smoothed_test_values=smoothed_test_values,
# test_terms=terms, training_values=training_values,
# smoothed_training_values=smoothed_training_values,
# normalised=normalized,
# test_forecasts=train_test, lims=self.__lims)
#
# return html_results, training_values.items()

0 comments on commit d2c39da

Please sign in to comment.