diff --git a/engines/BBDD.py b/engines/BBDD.py index c05f52e..0b43bc7 100644 --- a/engines/BBDD.py +++ b/engines/BBDD.py @@ -96,7 +96,7 @@ def new_model(name, winner, model,params,metric): DBSession = sessionmaker(bind=engine) session = DBSession() print ("Model saved") - print(model) + #print(model) new_model = Model(TS_name=name, TS_winner_name = winner, TS_model=bytearray(model),TS_model_params= params,TS_metric=metric) session.add(new_model) diff --git a/engines/functions_timeseries.py b/engines/functions_timeseries.py index 4f59ec5..3ffc07a 100644 --- a/engines/functions_timeseries.py +++ b/engines/functions_timeseries.py @@ -3,6 +3,8 @@ from . holtwinter import anomaly_holt,forecast_holt from . auto_arima import anomaly_AutoArima from . lstm import anomaly_LSTM, anomaly_uni_LSTM +from . tbats import anomaly_uni_TBATS + import traceback from . BBDD import new_model, get_best_model @@ -51,16 +53,14 @@ def model_univariate(lista_datos,num_fut,desv_mse,train,name): print(e) print ('ERROR: exception executing LSTM univariate') - #try: - #if (len(lista_datos) > 100): - ##new_length= - #lista_datos_ari=lista_datos[len(lista_datos)-100:] - #engines_output['arima'] = anomaly_AutoArima(lista_datos_ari,num_fut,len(lista_datos),desv_mse) - #debug['arima'] = engines_output['arima']['debug'] - #except Exception as e: - #print(e) - #print ('ERROR: exception executing Autoarima') + try: + engines_output['TBATS'] = anomaly_uni_TBATS(lista_datos,num_fut,desv_mse,train,name) + debug['TBATS'] = engines_output['TBATS']['debug'] + except Exception as e: + print(e) + print ('ERROR: exception executing TBATS univariate') + try: if (train): engines_output['VAR'] = univariate_anomaly_VAR(lista_datos,num_fut,name) diff --git a/engines/helpers.py b/engines/helpers.py index ea24702..7c49e5f 100644 --- a/engines/helpers.py +++ b/engines/helpers.py @@ -1,10 +1,28 @@ from keras.models import Sequential from keras.layers.recurrent import LSTM from keras.layers.core import Dense, Activation, Dropout -import pandas as pd +import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error,mean_absolute_error - +import statsmodels.api as sm + + +def seasonal_options (a): + print(" Starting seasonal finding") + print(a) + x =sm.tsa.stattools.pacf(a) + + possible =[] + for i in range(4, len(x)-6): + before2 = x[i-2] + before= x[i-1] + period = x[i] + last = x[i+1] + last2 = x[i+2] + if (before2 < before < period > last ): + possible.append(i-1) + print ("Finishing seasonal finding") + return possible def windows(seq, num): avg = len(seq) / float(num) @@ -17,7 +35,7 @@ def windows(seq, num): return out - + def create_dataset(dataset, window_size = 1): data_X, data_Y = [], [] for i in range(len(dataset) - window_size - 1): @@ -26,25 +44,25 @@ def create_dataset(dataset, window_size = 1): data_Y.append(dataset[i + window_size, 0]) return(np.array(data_X), np.array(data_Y)) - + # Define the model. def fit_model_new(train_X, train_Y, window_size = 1): model2 = Sequential() - model2.add(LSTM(input_shape = (window_size, 1), - units = window_size, + model2.add(LSTM(input_shape = (window_size, 1), + units = window_size, return_sequences = True)) model2.add(Dropout(0.5)) model2.add(LSTM(256)) model2.add(Dropout(0.5)) model2.add(Dense(1)) model2.add(Activation("linear")) - model2.compile(loss = "mse", + model2.compile(loss = "mse", optimizer = "adam") model2.summary() # Fit the first model. - model2.fit(train_X, train_Y, epochs = 80, - batch_size = 1, + model2.fit(train_X, train_Y, epochs = 80, + batch_size = 1, verbose = 2) return(model2) @@ -60,13 +78,13 @@ def predict_and_score(model, X, Y,scaler): mae = mean_absolute_error(orig_data[0], pred[:, 0]) return(score, pred, pred_scaled,mae) - -def mean_absolute_percentage_error(y_true, y_pred): + +def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 - + def merge_two_dicts(x, y): z = x.copy() # start with x's keys and values @@ -81,8 +99,7 @@ def create_train_test(lista_puntos, lista_datos): df.set_index('puntos',inplace=True,drop=False) tam_train = int(len(df)*0.7) print (" train length" + str(tam_train)) - + df_train = df[:tam_train] df_test = df[tam_train:] return df, df_train, df_test - diff --git a/engines/holtwinter.py b/engines/holtwinter.py index a9b2f9c..e053a6a 100644 --- a/engines/holtwinter.py +++ b/engines/holtwinter.py @@ -3,12 +3,23 @@ import pandas as pd from sklearn.metrics import mean_squared_error,mean_absolute_error from statsmodels.tsa.api import ExponentialSmoothing -from . helpers import create_train_test +from . helpers import create_train_test,seasonal_options import pickle from . BBDD import new_model, get_best_model from struct import * +def chunkIt(seq, num): + avg = len(seq) / float(num) + out = [] + last = 0.0 + + while last < len(seq): + out.append(len(seq[int(last):int(last + avg)])) + last += avg + + return out + def anomaly_holt(lista_datos,num_fut,desv_mse=0,name='NA'): lista_puntos = np.arange(0, len(lista_datos),1) @@ -68,37 +79,47 @@ def anomaly_holt(lista_datos,num_fut,desv_mse=0,name='NA'): best_period=0 best_trend='null' #list_trend=['add','mul','additive','multiplicative'] - list_trend=['add'] - for trend in list_trend: - for period in range(4,18): - print ('Periodo', period) - list_forecast_camb = [] - tam_train = int(len(df)*0.7) - df_test = df[tam_train:] - for i in range(0,len(df_test)): - print ('Prediccion punto ', i) - df_train_camb = df[:tam_train+i] - stepwise_model_camb = ExponentialSmoothing(df_train_camb['valores'],seasonal_periods=period ,trend=trend, seasonal='add', ) - fit_stepwise_model_camb = stepwise_model_camb.fit() - forecast_camb = fit_stepwise_model_camb.forecast(1) - - list_forecast_camb.append(forecast_camb.values[0]) - - mae_temp = mean_absolute_error(list_forecast_camb,df_test['valores'].values) - if mae_temp < mae_period: - best_period = period - best_trend = trend - print ('best_period',best_period) - print ('best_trend', best_trend) - print ('mae_temp', mae_temp) - mae_period = mae_temp - else: - print ('aa') + list_trend=['add','mul', 'additive', 'multiplicative'] #,'None'] + print ("pasa hasta aqui") + periods = seasonal_options(df.valores) + print (periods) + #for trend_val in list_trend: + for seasonal_val in list_trend: + for period in periods: + print ('Periodo', period) + list_forecast_camb = [] + tam_train = int(len(df)*0.7) + df_test = df[tam_train:] + part_lenghts = chunkIt(range(len(df_test)),3) + + for i in part_lenghts: + print ('Prediccion punto ', i) + df_train_camb = df[:tam_train+i] + stepwise_model_camb = ExponentialSmoothing(df_train_camb['valores'] , seasonal=seasonal_val ,seasonal_periods=period ).fit() + forecast_camb = stepwise_model_camb.forecast(i) + + list_forecast_camb.extend(forecast_camb.values[:i]) + + mae_temp = mean_absolute_error(list_forecast_camb,df_test['valores'].values) + if mae_temp < mae_period: + best_period = period + # best_trend = trend_val + best_seasonal = seasonal_val + print ('best_period',best_period) + # print ('best_trend', best_trend) + print ('mae_temp', mae_temp) + print ('best_seasonal', best_seasonal) + mae_period = mae_temp + else: + print ('aa') + + + print ("######best mae is " + str(mae_period) + " with the period " + str(best_period)+ " trend " + best_trend) - stepwise_model = ExponentialSmoothing(df_train['valores'],seasonal_periods=best_period ,trend=best_trend, seasonal='add', ) + stepwise_model = ExponentialSmoothing(df_train['valores'],seasonal_periods=best_period , seasonal=best_seasonal ) fit_stepwise_model = stepwise_model.fit() future_forecast_pred = fit_stepwise_model.forecast(len(df_test['valores'])) @@ -150,7 +171,7 @@ def anomaly_holt(lista_datos,num_fut,desv_mse=0,name='NA'): df_aler_ult['anomaly_score']= ( df_aler_ult['anomaly_score'] - min ) /(max - min) print ("Anomaly finished. Start forecasting") - stepwise_model1 = ExponentialSmoothing(df['valores'],seasonal_periods=best_period,trend=best_trend , seasonal='add') + stepwise_model1 = ExponentialSmoothing(df['valores'],seasonal_periods=best_period,seasonal=best_seasonal) print ("Pass the training") fit_stepwise_model1 = stepwise_model1.fit() diff --git a/engines/lstm.py b/engines/lstm.py index 4b0fabd..c764988 100644 --- a/engines/lstm.py +++ b/engines/lstm.py @@ -58,8 +58,7 @@ def add_hlayer(model, num_nodes, return_sequences=False): def define_model(n_nodes, n_hlayers, dropout, input_data, output_shape): model = Sequential() if n_hlayers == 1: - model.add(LSTM(output_dim =int(n_nodes), activation='relu', input_shape =(input_data.shape[1], input_data.shape[2]), - return_sequences=False)) + model.add(LSTM(units =int(n_nodes), activation='relu', input_shape =(input_data.shape[1], input_data.shape[2]),return_sequences=False)) else: #model.add(LSTM(output_dim =int(n_nodes), activation='relu', input_shape =(input_data.shape[1], input_data.shape[2]),return_sequences=True)) model.add(LSTM(activation='relu', input_shape =(input_data.shape[1], input_data.shape[2]),return_sequences=True,units =int(n_nodes) )) @@ -160,7 +159,7 @@ def anomaly_uni_LSTM(lista_datos,num_forecast=10,desv_mse=2,train='True',name='t ##################neural network###################### models_dict = {} - n_hlayers = [1, 2] + n_hlayers = [1, 2,3] n_nodes = [100, 300, 500] n_dropout = [0, 0.1, 0.15, 0.20] @@ -191,7 +190,7 @@ def anomaly_uni_LSTM(lista_datos,num_forecast=10,desv_mse=2,train='True',name='t gc.collect() model = define_model(nodes, hlayer, drop, win_train_x, num_forecast) model_name = 'model_nlayers_{}_nnodes_{}_dropout_{}'.format(hlayer, nodes, drop) - model.fit(win_train_x, win_train_y, epochs=65, verbose=0, shuffle=False) + model.fit(win_train_x, win_train_y, epochs=85, verbose=0, shuffle=False) #models_dict[name] = model print(model_name) @@ -214,6 +213,8 @@ def anomaly_uni_LSTM(lista_datos,num_forecast=10,desv_mse=2,train='True',name='t print ('rmse', rmse) print ('mae', mae) if mae < best_mae: + best_mae=mae + print ("LSTM best new model " + str(mae)+"\n") best_model=model @@ -400,7 +401,7 @@ def anomaly_uni_LSTM(lista_datos,num_forecast=10,desv_mse=2,train='True',name='t #print('reshape win_todo_x',win_todo_x.shape) - name_model = actual_model.fit(win_todo_x, win_todo_y, epochs=25, verbose=0, shuffle=False) + name_model = actual_model.fit(win_todo_x, win_todo_y, epochs=85, verbose=0, shuffle=False) @@ -564,7 +565,7 @@ def anomaly_LSTM(list_var,num_fut=10,desv_mae=2): #print 'fit model {}'.format(model) try: seed(69) - name_model = models_dict[model].fit(win_train_x, win_train_y_var_pred, epochs=25, verbose=0, shuffle=False) + name_model = models_dict[model].fit(win_train_x, win_train_y_var_pred, epochs=85, verbose=0, shuffle=False) dict_eval_models[model] = name_model except: dict_eval_models[model] = 'Error' @@ -701,7 +702,7 @@ def anomaly_LSTM(list_var,num_fut=10,desv_mae=2): #print ('win_todo_y_var_pred',win_todo_y_var_pred) #print ('shape win_todo_y_var_pred',win_todo_y_var_pred.shape) - name_model = models_dict[best_model].fit(win_todo_x, win_todo_y_var_pred, epochs=25, verbose=0, shuffle=False) + name_model = models_dict[best_model].fit(win_todo_x, win_todo_y_var_pred, epochs=85, verbose=0, shuffle=False) falta_win_todo_x = x[-num_forecast:,:] #print ('falta_win_todo_x',falta_win_todo_x)