Forecasting at Scale by Sean Taylor and Benjamin Letham
https://facebook.github.io/prophet/docs/installation.html#python
pip3 install fbprophet
- The input to Prophet is always a dataframe with two columns: ds and y.
- The ds (datestamp) column should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp.
- The y column must be numeric, and represents the measurement we wish to forecast.
df = pd.read_csv('../UPDATE-TSA-NOTEBOOKS/Data/BeerWineLiquor.csv')
date | beer | |
---|---|---|
0 | 1/1/1992 | 1509 |
1 | 2/1/1992 | 1541 |
2 | 3/1/1992 | 1597 |
3 | 4/1/1992 | 1675 |
4 | 5/1/1992 | 1822 |
# Change the names
df.columns = ['ds','y']
# Make sure ds column is a pandas datetime object
df['ds'] = pd.to_datetime(df['ds'])
ds | y | |
---|---|---|
0 | 1992-01-01 00:00:00 | 1509 |
1 | 1992-02-01 00:00:00 | 1541 |
2 | 1992-03-01 00:00:00 | 1597 |
3 | 1992-04-01 00:00:00 | 1675 |
4 | 1992-05-01 00:00:00 | 1822 |
- By default Prophet is going to expect daily data
print(len(df)) # to know how many rows we have
train = df.iloc[:576]
test = df.iloc[576:]
from fbprophet import Prophet
m = Prophet(seasonality_mode='additive') # 'multiplicative' Find out what is better for your model: RMSE
m.fit(train)
# In Stead of doing df.index.freq = 'MS' as we do in Statsmodel
future = m.make_future_dataframe(periods=12,freq='MS')
preds = m.predict(future)
preds.head() # to find out more about the output
# yhat, yhat_lower and yhat_upper are probably the most important terms of this DF
preds[['ds','yhat_lower','yhat_upper','yhat']].tail(24)
ax = preds.plot(x='ds',y='yhat',label='Predictions',legend=True,figsize=(12,6))
test.plot(x='ds',y='y',label='Test Data',legend=True,ax=ax,xlim=('2018-01-01','2019-01-01'));
m.plot(preds);
# Trend and Seasonality
m.plot_components(preds);
from statsmodels.tools.eval_measures import rmse
# Alternative:
# from sklearn.metrics import mean_squared_error
RMSE = rmse(test['y'],preds['yhat'][-12:])
RMSE # it should be interesting to compare this value with test['y'].mean()
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import plot_cross_validation_metric
# The initial period should be long enough to capture all of the components of the model,
# in particular seasonalities and extra regressors: at least a year for yearly seasonality,
# at least a week for weekly seasonality, etc.
# Initial training period
initial = 5 * 365
initial = str(initial) + ' days'
# Period lenght that we are going to perform the cross validation.
# How many times to fold?
period = 5 * 365
period = str(period) + ' days'
# Horizon of prediction for essentially each fold.
# How far out do you want to forecast for each period?
horizon = 365
horizon = str(horizon) + ' days'
df_cv = cross_validation(m,initial=initial,period=period,horizon=horizon)
performance_metrics(df_cv)
plot_cross_validation_metric(df_cv,metric='rmse');
from fbprophet import Prophet
m = Prophet()
m.fit(df)
# In Stead of doing df.index.freq = 'MS'
future = m.make_future_dataframe(periods=12,freq='MS')
forecast = m.predict(future)
m.plot(forecast);
m.plot_components(forecast);
# 1. Loading Libraries
import pandas as pd
import matplotlib.pyplot as plt
from fbprophet import Prophet
# 2. Reading the Dataset
df = pd.read_csv('../UPDATE-TSA-NOTEBOOKS/Data/HospitalityEmployees.csv')
df.columns = ['ds','y']
df['ds'] = pd.to_datetime(df['ds'])
# 3. Fitting the Model
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=12,freq='MS')
# 4. Predcitions or Forecasting
forecast = m.predict(future)
# 5. Main Changes in Trend Line
# It shows the major points where the trend line happened to change
from fbprophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(),m,forecast);
What kind of model will give us better results? Add or Mult
Split Train Test and Watch RMSE for ADD or MULT if you are not sure about your Dataset
# 1. Loading Libraries
import pandas as pd
import matplotlib.pyplot as plt
from fbprophet import Prophet
# 2. Reading the Dataset
df = pd.read_csv('../UPDATE-TSA-NOTEBOOKS/Data/airline_passengers.csv')
df.columns = ['ds','y']
df['ds'] = pd.to_datetime(df['ds'])
# 3. Model
m = Prophet(seasonality_mode='additive')
m.fit(df)
future = m.make_future_dataframe(periods=50,freq='MS')
forecast = m.predict(future)
# 4. Plot
fig = m.plot(forecast);
fig = m.plot_components(forecast);
from fbprophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(),m,forecast);
# 1. Loading Libraries
import pandas as pd
import matplotlib.pyplot as plt
from fbprophet import Prophet
# 2. Reading the Dataset
df = pd.read_csv('../UPDATE-TSA-NOTEBOOKS/Data/airline_passengers.csv')
df.columns = ['ds','y']
df['ds'] = pd.to_datetime(df['ds'])
# 3. Model
m = Prophet(seasonality_mode='multiplicative')
m.fit(df)
future = m.make_future_dataframe(periods=50,freq='MS')
forecast = m.predict(future)
# 4. Plot
fig = m.plot(forecast);
fig = m.plot_components(forecast);
from fbprophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(),m,forecast);