Add Croston Method #97

Added some tests
antoinecarme · Oct 29, 2018 · f363763 · f363763
1 parent 5bf295f
commit f363763
Show file tree

Hide file tree

Showing 10 changed files with 224 additions and 0 deletions.
diff --git a/tests/croston/__init__.py b/tests/croston/__init__.py
diff --git a/tests/croston/croston_prototype_1.py b/tests/croston/croston_prototype_1.py
@@ -0,0 +1,65 @@
+# source : https://otexts.org/fpp2/counts.html
+
+import pandas as pd
+
+# %matplotlib inline
+
+lCounts = "0 2 0 1 0 11 0 0 0 0 2 0 6 3 0 0 0 0 0 7 0 0 0 0 0 0 0 3 1 0 0 1 0 1 0 0".split()
+lCounts = [float(c) for c in lCounts]
+N = len(lCounts)
+lDates = pd.date_range(start="2000-01-01", periods=N, freq='m')
+
+df = pd.DataFrame({"Date" : lDates, "Count" : lCounts})
+
+#  q is often called the “demand” and a the “inter-arrival time”.
+q = df[abs(df['Count']) > 0.0]['Count']
+demand_times = pd.Series(list(q.index)) + 1
+a = demand_times - demand_times.shift(1).fillna(0.0)
+df2 = pd.DataFrame({'demand_time' : list(demand_times), 'q' : list(q) , 'a' : list(a) })
+df2
+
+def get_coeff(alpha , croston_type):
+    if(croston_type == "sba"):
+        return 1.0-(alpha/2.0)
+    elif(croston_type == "sbj"):
+        return (1.0 - alpha/(2.0-alpha))
+    # default 
+    return 1.0
+
+
+
+# q  and a forecast
+alpha = 0.1
+
+df2['q_est'] = None
+df2['a_est'] = None
+
+df2.loc[0 , 'q_est'] = df2['q'][0]
+df2.loc[0,  'a_est'] = df2['a'][0]
+for i in range(df2.shape[0] - 1):
+    q1 = (1.0 - alpha) * df2['q_est'][ i ] + alpha * df2['q'][ i ]
+    a1 = (1.0 - alpha) * df2['a_est'][ i ] + alpha * df2['a'][ i ]
+    df2.loc[i + 1, 'q_est'] = q1
+    df2.loc[i + 1, 'a_est'] = a1
+
+coeff = get_coeff(alpha , "default")
+df2['forecast'] = coeff * df2['q_est'] / df2['a_est']
+df2
+
+
+
+forecast_11 = df2['q_est'][10] / df2['a_est'][10]
+forecast_11
+
+
+df2['index'] = df2['demand_time'] - 1
+
+
+df1 = df.reset_index()
+df3 = df1.merge(df2 , how='left', on=('index' , 'index'))
+
+df4 = df3.fillna(method='ffill')
+
+print(df4)
+
+# df4.plot('Date', ['Count' , 'forecast'])
diff --git a/tests/croston/croston_prototype_2.py b/tests/croston/croston_prototype_2.py
@@ -0,0 +1,76 @@
+# r example
+#  y <- rpois(20,lambda=.3)
+#  fcast <- croston(y)
+
+#> y
+#  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
+
+
+# r result : 
+# > fcast
+#   Point Forecast
+# 21       0.180018
+
+# > fcast$fitted
+# Time Series:
+# Start = 1 
+# End = 20 
+# Frequency = 1 
+#  [1]        NA 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1666667
+#  [8] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
+# [15] 0.1666667 0.1666667 0.1666667 0.1538462 0.1680672 0.1680672
+
+
+import pandas as pd
+
+lCounts = "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0".split()
+lCounts = [float(c) for c in lCounts]
+N = len(lCounts)
+lDates = pd.date_range(start="2000-01-01", periods=N, freq='m')
+
+df = pd.DataFrame({"Date" : lDates, "Count" : lCounts})
+
+#  q is often called the “demand” and a the “inter-arrival time”.
+q = df[abs(df['Count']) > 0.0]['Count']
+demand_times = pd.Series(list(q.index)) + 1
+a = demand_times - demand_times.shift(1).fillna(0.0)
+df2 = pd.DataFrame({'demand_time' : list(demand_times), 'q' : list(q) , 'a' : list(a) })
+df2
+
+def get_coeff(alpha , croston_type):
+    if(croston_type == "sba"):
+        return 1.0-(alpha/2.0)
+    elif(croston_type == "sbj"):
+        return (1.0 - alpha/(2.0-alpha))
+    # default 
+    return 1.0
+
+# q  and a forecast
+alpha = 0.1
+
+df2['q_est'] = None
+df2['a_est'] = None
+
+df2.loc[0 , 'q_est'] = df2['q'][0]
+df2.loc[0,  'a_est'] = df2['a'][0]
+for i in range(df2.shape[0] - 1):
+    q1 = (1.0 - alpha) * df2['q_est'][ i ] + alpha * df2['q'][ i ]
+    a1 = (1.0 - alpha) * df2['a_est'][ i ] + alpha * df2['a'][ i ]
+    df2.loc[i + 1, 'q_est'] = q1
+    df2.loc[i + 1, 'a_est'] = a1
+
+
+df2['forecast'] = get_coeff(alpha , "default") * df2['q_est'] / df2['a_est']
+df2
+
+forecast_11 = df2['q_est'][df2.shape[0] - 1] / df2['a_est'][df2.shape[0] - 1]
+forecast_11
+
+df2['index'] = df2['demand_time'] - 1
+
+df1 = df.reset_index()
+df3 = df1.merge(df2 , how='left', on=('index' , 'index'))
+
+df4 = df3.fillna(method='ffill')
+
+print(df4)
diff --git a/tests/croston/croston_test_1_None.py b/tests/croston/croston_test_1_None.py
@@ -0,0 +1,3 @@
+import pyaf.tests.croston.croston_tests as crost
+
+crost.create_model(N = 365 , croston_type = "SBJ")
diff --git a/tests/croston/croston_test_1_SBA.py b/tests/croston/croston_test_1_SBA.py
@@ -0,0 +1,3 @@
+import pyaf.tests.croston.croston_tests as crost
+
+crost.create_model(N = 365 , croston_type = "SBA")
diff --git a/tests/croston/croston_test_1_SBA_linear_trend.py b/tests/croston/croston_test_1_SBA_linear_trend.py
@@ -0,0 +1,3 @@
+import pyaf.tests.croston.croston_tests as crost
+
+crost.create_model(N = 365 , croston_type = "SBA", iTrend = True)
diff --git a/tests/croston/croston_test_1_SBJ.py b/tests/croston/croston_test_1_SBJ.py
@@ -0,0 +1,3 @@
+import pyaf.tests.croston.croston_tests as crost
+
+crost.create_model(N = 365 , croston_type = "SBJ")
diff --git a/tests/croston/croston_test_1_SBJ_linear_trend.py b/tests/croston/croston_test_1_SBJ_linear_trend.py
@@ -0,0 +1,3 @@
+import pyaf.tests.croston.croston_tests as crost
+
+crost.create_model(N = 365 , croston_type = "SBJ", iTrend = True)
diff --git a/tests/croston/croston_test_1_legacy_linear_trend.py b/tests/croston/croston_test_1_legacy_linear_trend.py
@@ -0,0 +1,3 @@
+import pyaf.tests.croston.croston_tests as crost
+
+crost.create_model(N = 365 , croston_type = None, iTrend = True)
diff --git a/tests/croston/croston_tests.py b/tests/croston/croston_tests.py
@@ -0,0 +1,65 @@
+import numpy as np
+import pandas as pd
+
+def create_intermittent_signal(N):
+    sig = np.zeros(N)
+    for i in range(0, N // 30):
+        if(np.random.random() < 0.5):
+            sig[i * 30] = np.random.randint(100)
+    return sig
+
+
+
+# create an intemittent signal with a linear trend
+def create_intermittent_signal_linear_trend(N):
+    sig = [k/ N for k in range(N)]
+    for i in range(0, N // 30):
+        if(np.random.random() < 0.5):
+            sig[i * 30] = sig[i * 30] + 0.5 * np.random.random()
+    return sig
+
+def create_model(N = 365 , croston_type=None, iTrend = False):
+    # N = 365
+    np.random.seed(seed=1960)
+    signal = None
+    if(iTrend):
+        signal = create_intermittent_signal_linear_trend(N)
+    else:
+        signal = create_intermittent_signal(N)
+
+    df_train = pd.DataFrame({"Date" : pd.date_range(start="2016-01-25", periods=N, freq='D'),
+                             "Signal" : signal})
+    # print(df_train.head(N))
+
+    import pyaf.ForecastEngine as autof
+    lEngine = autof.cForecastEngine()
+
+    lEngine.mOptions.set_active_trends(['None', 'LinearTrend'])
+    lEngine.mOptions.set_active_periodics(['None'])
+    lEngine.mOptions.set_active_transformations(['None'])
+    lEngine.mOptions.set_active_autoregressions(['CROSTON'])
+    lEngine.mOptions.mModelSelection_Criterion = "L2";
+    lEngine.mOptions.mCrostonOptions.mMethod = croston_type
+    lEngine.mOptions.mCrostonOptions.mZeroRate = 0.0
+
+    # get the best time series model for predicting one week
+    lEngine.train(iInputDS = df_train, iTime = 'Date', iSignal = 'Signal', iHorizon = 7);
+
+    lEngine.getModelInfo()
+
+    lName = "outputs/croston_" + str(croston_type) + "_"
+    lName = lName + ("linear_trend" if iTrend else "no_trend" )
+    lEngine.standardPlots(lName);
+
+    # predict one week
+    df_forecast = lEngine.forecast(iInputDS = df_train, iHorizon = 7)
+    # list the columns of the forecast dataset
+    print(df_forecast.columns) #
+
+    cols = ['Date', 'Signal', '_Signal',
+            '_Signal_TransformedForecast', 'Signal_Forecast']
+    # print the real forecasts
+    print(df_forecast[cols].tail(12))
+
+    print(df_forecast['Signal'].describe())
+    print(df_forecast['Signal_Forecast'].describe())