Skip to content

Commit

Permalink
Merge pull request #148 from antoinecarme/missing_data
Browse files Browse the repository at this point in the history
Missing data
  • Loading branch information
antoinecarme authored Jul 30, 2020
2 parents dad5915 + c8601eb commit 6b652e9
Show file tree
Hide file tree
Showing 434 changed files with 51,624 additions and 32,148 deletions.
21 changes: 21 additions & 0 deletions pyaf/TS/MissingData.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,27 @@ def apply_signal_imputation_method(self, iInputDS, iSignal):
elif(self.mOptions.mMissingDataOptions.mSignalMissingDataImputation == "Interpolate"):
lSignal = self.interpolate_signal_if_needed(iInputDS , iSignal)
iInputDS[iSignal] = lSignal

elif(self.mOptions.mMissingDataOptions.mSignalMissingDataImputation == "Constant"):
lSignal = iInputDS[iSignal].fillna(self.mOptions.mMissingDataOptions.mConstant, method=None)
iInputDS[iSignal] = lSignal

elif(self.mOptions.mMissingDataOptions.mSignalMissingDataImputation == "Mean"):
lMean = iInputDS[iSignal].mean()
lSignal = iInputDS[iSignal].fillna(lMean, method=None)
iInputDS[iSignal] = lSignal

elif(self.mOptions.mMissingDataOptions.mSignalMissingDataImputation == "Median"):
lMedian = iInputDS[iSignal].median()
lSignal = iInputDS[iSignal].fillna(lMedian, method=None)
iInputDS[iSignal] = lSignal

elif(self.mOptions.mMissingDataOptions.mSignalMissingDataImputation == "PreviousValue"):
lSignal = iInputDS[iSignal].fillna(method='ffill')
# replace the first empty values with the first known value
lSignal = lSignal.fillna(method='bfill')
iInputDS[iSignal] = lSignal

return iInputDS

def interpolate_time_if_needed(self, iInputDS , iTime):
Expand Down
3 changes: 2 additions & 1 deletion pyaf/TS/Options.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,9 @@ def __init__(self):
class cMissingDataOptions:

def __init__(self):
self.mSignalMissingDataImputation = None # [None , "DiscardRow", "Interpolate"]
self.mSignalMissingDataImputation = None # [None , "DiscardRow", "Interpolate", "Mean", "Median" , "Constant" , "PreviousValue"]
self.mTimeMissingDataImputation = None # [None , "DiscardRow", "Interpolate"]
self.mConstant = 0.0

class cSignalDecomposition_Options(cModelControl):

Expand Down
5 changes: 4 additions & 1 deletion pyaf/TS/SignalDecomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,10 @@ def standardPlots(self, name = None, format = 'png'):
start_time = time.time()
logger.info("START_PLOTTING")
for lSignal in self.mSignals:
self.mBestModels[lSignal].standardPlots(name + "_" + lSignal, format);
lName = name
if(name is not None):
lName = str(name) + "_" + str(lSignal)
self.mBestModels[lSignal].standardPlots(lName, format);
lPlotTime = time.time() - start_time;
logger.info("END_PLOTTING_TIME_IN_SECONDS " + str(lPlotTime))

Expand Down
554 changes: 547 additions & 7 deletions tests/Makefile

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions tests/bugs/issue_55/grouping_issue_55_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
French_Wine_Export_in_Euros_DF.info()


lInfo = lEngine.to_json()
lInfo = lEngine.to_dict()
print(lInfo.keys())

print(lInfo['Structure'])
Expand All @@ -133,7 +133,7 @@

lEngine.mSignalHierarchy.plot()

CN_Engine = lEngine.mSignalHierarchy.mModels[2]['__CN'] # __CN is at hierarchical level 2
CN_Engine = lEngine.mSignalHierarchy.mModels # __CN is at hierarchical level 2

CN_Engine.getModelInfo()

Expand Down
4 changes: 2 additions & 2 deletions tests/gen_makefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def add_makefile_entry(subdir1):
return test_target;


str1 = "artificial basic_checks bugs cross_validation croston exog expsmooth func HeartRateTimeSeries heroku hierarchical HourOfWeek model_control perf svr transformations neuralnet real-life time_res perfs demos xgb xeon-phi-parallel sampling temporal_hierarchy WeekOfMonth";
str1 = "artificial basic_checks bugs cross_validation croston exog expsmooth func HeartRateTimeSeries heroku hierarchical HourOfWeek model_control perf svr transformations neuralnet real-life time_res perfs demos xgb xeon-phi-parallel sampling temporal_hierarchy WeekOfMonth missing_data";
subdirs = str1.split();

print("PYTHON=python3\n\n");
Expand All @@ -45,6 +45,6 @@ def add_makefile_entry(subdir1):

print("all: " , str1 , "\n\t\n");

str2 = "demos basic_checks cross_validation croston exog heroku hierarchical model_control perfs svr transformations func real-life time_res xgb sampling HourOfWeek WeekOfMonth";
str2 = "demos basic_checks cross_validation croston exog heroku hierarchical model_control perfs svr transformations func real-life time_res xgb sampling HourOfWeek WeekOfMonth missing_data";

print("build-test : " , str2 , "\n\t\n");
16 changes: 16 additions & 0 deletions tests/missing_data/gen_air_passengers_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@





def gen_all_air_passengers():
lDir = "tests/missing_data"
for iTimeImp in [None , "DiscardRow", "Interpolate"]:
for iSigImp in [None , "DiscardRow", "Interpolate", "Mean", "Median" , "Constant" , "PreviousValue"]:
filename = lDir + "/test_missing_data_air_passengers_" + str(iTimeImp) + "_" + str(iSigImp) + ".py";
with open(filename, "w") as outfile:
print("WRTITING_FILE" , filename)
outfile.write("import tests.missing_data.test_missing_data_air_passengers_generic as gen\n\n")
outfile.write("gen.test_air_passengers_missing_data" + str((iTimeImp , iSigImp)) + "\n")

# gen_all_air_passengers()
16 changes: 16 additions & 0 deletions tests/missing_data/gen_ozone_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@





def gen_all_ozone():
lDir = "tests/missing_data"
for iTimeImp in [None , "DiscardRow", "Interpolate"]:
for iSigImp in [None , "DiscardRow", "Interpolate", "Mean", "Median" , "Constant" , "PreviousValue"]:
filename = lDir + "/test_missing_data_ozone_" + str(iTimeImp) + "_" + str(iSigImp) + ".py";
with open(filename, "w") as outfile:
print("WRTITING_FILE" , filename)
outfile.write("import tests.missing_data.test_missing_data_ozone_generic as gen\n\n")
outfile.write("gen.test_ozone_missing_data" + str((iTimeImp , iSigImp)) + "\n")

# gen_all_ozone()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('DiscardRow', 'Constant')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('DiscardRow', 'DiscardRow')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('DiscardRow', 'Interpolate')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('DiscardRow', 'Mean')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('DiscardRow', 'Median')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('DiscardRow', None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('DiscardRow', 'PreviousValue')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('Interpolate', 'Constant')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('Interpolate', 'DiscardRow')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('Interpolate', 'Interpolate')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('Interpolate', 'Mean')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('Interpolate', 'Median')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('Interpolate', None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data('Interpolate', 'PreviousValue')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data(None, 'Constant')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data(None, 'DiscardRow')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data(None, 'Interpolate')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data(None, 'Mean')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data(None, 'Median')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data(None, None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_air_passengers_generic as gen

gen.test_air_passengers_missing_data(None, 'PreviousValue')
54 changes: 54 additions & 0 deletions tests/missing_data/test_missing_data_air_passengers_generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@

import pandas as pd
import numpy as np


import pyaf.ForecastEngine as autof
import pyaf.Bench.TS_datasets as tsds

def add_some_missing_data_in_signal(df, col):
lRate = 0.2
df.loc[df.sample(frac=lRate, random_state=1960).index, col] = np.nan
return df

def add_some_missing_data_in_time(df, col):
lRate = 0.2
df.loc[df.sample(frac=lRate, random_state=1960).index, col] = np.nan
return df


def test_air_passengers_missing_data(iTimeMissingDataImputation, iSignalMissingDataImputation):

b1 = tsds.load_airline_passengers()
df = b1.mPastData

if(iTimeMissingDataImputation is not None):
df = add_some_missing_data_in_time(df, b1.mTimeVar)
if(iSignalMissingDataImputation is not None):
df = add_some_missing_data_in_signal(df, b1.mSignalVar)

lEngine = autof.cForecastEngine()
H = b1.mHorizon;
lEngine.mOptions.mMissingDataOptions.mTimeMissingDataImputation = iTimeMissingDataImputation
lEngine.mOptions.mMissingDataOptions.mSignalMissingDataImputation = iSignalMissingDataImputation
lEngine.train(df , b1.mTimeVar , b1.mSignalVar, H);
lEngine.getModelInfo();
print(lEngine.mSignalDecomposition.mTrPerfDetails.head());

dfapp_in = df.copy();
dfapp_in.tail()
dfapp_out = lEngine.forecast(dfapp_in, H);
#dfapp_out.to_csv("outputs/ozone_apply_out.csv")
dfapp_out.tail(2 * H)
print("Forecast Columns " , dfapp_out.columns);
Forecast_DF = dfapp_out[[b1.mTimeVar , b1.mSignalVar, b1.mSignalVar + '_Forecast']]
print(Forecast_DF.info())
print("Forecasts\n" , Forecast_DF.tail(H));

print("\n\n<ModelInfo>")
print(lEngine.to_json());
print("</ModelInfo>\n\n")
print("\n\n<Forecast>")
print(Forecast_DF.tail(2*H).to_json(date_format='iso'))
print("</Forecast>\n\n")

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('DiscardRow', 'Constant')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('DiscardRow', 'DiscardRow')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('DiscardRow', 'Interpolate')
3 changes: 3 additions & 0 deletions tests/missing_data/test_missing_data_ozone_DiscardRow_Mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('DiscardRow', 'Mean')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('DiscardRow', 'Median')
3 changes: 3 additions & 0 deletions tests/missing_data/test_missing_data_ozone_DiscardRow_None.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('DiscardRow', None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('DiscardRow', 'PreviousValue')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('Interpolate', 'Constant')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('Interpolate', 'DiscardRow')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('Interpolate', 'Interpolate')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('Interpolate', 'Mean')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('Interpolate', 'Median')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('Interpolate', None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data('Interpolate', 'PreviousValue')
3 changes: 3 additions & 0 deletions tests/missing_data/test_missing_data_ozone_None_Constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data(None, 'Constant')
3 changes: 3 additions & 0 deletions tests/missing_data/test_missing_data_ozone_None_DiscardRow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data(None, 'DiscardRow')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data(None, 'Interpolate')
3 changes: 3 additions & 0 deletions tests/missing_data/test_missing_data_ozone_None_Mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data(None, 'Mean')
3 changes: 3 additions & 0 deletions tests/missing_data/test_missing_data_ozone_None_Median.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data(None, 'Median')
3 changes: 3 additions & 0 deletions tests/missing_data/test_missing_data_ozone_None_None.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data(None, None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import tests.missing_data.test_missing_data_ozone_generic as gen

gen.test_ozone_missing_data(None, 'PreviousValue')
54 changes: 54 additions & 0 deletions tests/missing_data/test_missing_data_ozone_generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@

import pandas as pd
import numpy as np


import pyaf.ForecastEngine as autof
import pyaf.Bench.TS_datasets as tsds

def add_some_missing_data_in_signal(df, col):
lRate = 0.2
df.loc[df.sample(frac=lRate, random_state=1960).index, col] = np.nan
return df

def add_some_missing_data_in_time(df, col):
lRate = 0.2
df.loc[df.sample(frac=lRate, random_state=1960).index, col] = np.nan
return df


def test_ozone_missing_data(iTimeMissingDataImputation, iSignalMissingDataImputation):

b1 = tsds.load_ozone()
df = b1.mPastData

if(iTimeMissingDataImputation is not None):
df = add_some_missing_data_in_time(df, b1.mTimeVar)
if(iSignalMissingDataImputation is not None):
df = add_some_missing_data_in_signal(df, b1.mSignalVar)

lEngine = autof.cForecastEngine()
H = b1.mHorizon;
lEngine.mOptions.mMissingDataOptions.mTimeMissingDataImputation = iTimeMissingDataImputation
lEngine.mOptions.mMissingDataOptions.mSignalMissingDataImputation = iSignalMissingDataImputation
lEngine.train(df , b1.mTimeVar , b1.mSignalVar, H);
lEngine.getModelInfo();
print(lEngine.mSignalDecomposition.mTrPerfDetails.head());

dfapp_in = df.copy();
dfapp_in.tail()
dfapp_out = lEngine.forecast(dfapp_in, H);
#dfapp_out.to_csv("outputs/ozone_apply_out.csv")
dfapp_out.tail(2 * H)
print("Forecast Columns " , dfapp_out.columns);
Forecast_DF = dfapp_out[[b1.mTimeVar , b1.mSignalVar, b1.mSignalVar + '_Forecast']]
print(Forecast_DF.info())
print("Forecasts\n" , Forecast_DF.tail(H));

print("\n\n<ModelInfo>")
print(lEngine.to_json());
print("</ModelInfo>\n\n")
print("\n\n<Forecast>")
print(Forecast_DF.tail(2*H).to_json(date_format='iso'))
print("</Forecast>\n\n")

Loading

0 comments on commit 6b652e9

Please sign in to comment.