Attribution Logic Update with Fixed Organic (#82)

* doc string update * plots update * plots update * model minor update * attribution gamma update * attribution gamma update * fixing minor target maximizer unit test * flake 8 formatting * minor update on the attributor gamma setting * formatting * fix a few utils for net profit optimization * formatting * Update __init__.py
edwinnglabs · Dec 12, 2023 · 31e09de · 31e09de
1 parent f5af233
commit 31e09de
Show file tree

Hide file tree

Showing 28 changed files with 5,699 additions and 2,923 deletions.
diff --git a/docs/examples/adstock.ipynb b/docs/examples/adstock.ipynb
diff --git a/docs/examples/attribution.ipynb b/docs/examples/attribution.ipynb
diff --git a/docs/examples/net_returns_max.ipynb b/docs/examples/net_returns_max.ipynb
diff --git a/docs/examples/quickstart.ipynb b/docs/examples/quickstart.ipynb
diff --git a/docs/examples/target_max.ipynb b/docs/examples/target_max.ipynb
diff --git a/karpiu/__init__.py b/karpiu/__init__.py
@@ -1,2 +1,2 @@
 name = "karpiu"
-__version__ = "0.0.1"
+__version__ = "0.0.2-alpha"
diff --git a/karpiu/diagnostic.py b/karpiu/diagnostic.py
@@ -1,20 +1,29 @@
 import numpy as np
+import pandas as pd
 import statsmodels.api as sm
 import matplotlib.pyplot as plt
 from scipy import stats
 import arviz as az
+import logging
+from copy import deepcopy
 
 from statsmodels.tsa.stattools import adfuller
 from statsmodels.stats.stattools import durbin_watson
 
+from tqdm.auto import tqdm
+
+from typing import Tuple
+
+from .models import MMM
+from karpiu.explainability import AttributorGamma
+from karpiu.model_shell import MMMShell
+
+
 """ Diagnostic tools for MMM model object
 """
 
 
-def check_residuals(model):
-    # TODO:
-    # assert model instance is the one stage model
-
+def check_residuals(model: MMM):
     max_adstock = model.get_max_adstock()
     df = model.raw_df.copy()
     pred = model.predict(df, decompose=False)
@@ -44,14 +53,12 @@ def check_residuals(model):
     fig.tight_layout()
 
 
-def check_stationarity(model):
+def check_stationarity(model: MMM):
     # 1. Run [Augmented Dicker-Fuller test](https://en.wikipedia.org/wiki/Augmented_Dickey%E2%80%93Fuller_test),
     # it needs to reject the null which means unit root is not present.
     # 2. Check [Durbin-Watson Stat](https://en.wikipedia.org/wiki/Durbin%E2%80%93Watson_statistic),
     # the closer to `2`, the better.
 
-    # TODO:
-    # assert model instance is the one stage model
     max_adstock = model.get_max_adstock()
     df = model.raw_df.copy()
     pred = model.predict(df, decompose=False)
@@ -71,7 +78,7 @@ def check_stationarity(model):
     print("Durbin-Watson Stat: {:.3f} Recommended Values:(|x - 2|>=1.0".format(dw_stat))
 
 
-def check_convergence(model):
+def check_convergence(model: MMM):
     posetriors = model._model.get_posterior_samples(relabel=True, permute=False)
     spend_cols = model.get_spend_cols()
 
@@ -82,3 +89,82 @@ def check_convergence(model):
         chain_prop={"color": ["r", "b", "g", "y"]},
         # figsize=(len(spend_cols), 30),
     )
+
+
+def two_steps_optim_check(
+    model: MMM,
+    budget_start: str,
+    n_iters: int = 10,
+    adstock_off: bool = True,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    model = deepcopy(model)
+    channels = model.get_spend_cols()
+    n_channels = len(channels)
+    date_col = model.date_col
+    budget_start = pd.to_datetime(budget_start)
+    budget_end = budget_start + pd.DateOffset(days=1)
+
+    raw_df = model.get_raw_df()
+    init_weight = np.mean(
+        raw_df.loc[
+            (raw_df[date_col] >= budget_start) & (raw_df[date_col] <= budget_end),
+            channels,
+        ].values,
+        axis=0,
+    )
+
+    # arbitrary
+    base_weight = np.ones((1, n_channels)) * init_weight
+    # arbitrary
+    ltv = np.random.random_integers(low=20, high=50, size=n_channels)
+
+    total_response = np.empty(n_iters)
+    revs = np.empty(n_iters)
+    # for 2-steps
+    budget_ratios = np.linspace(0, 1, n_iters)
+
+    # suppress adstock for testing
+    init_max_adstock = model.get_max_adstock()
+    if adstock_off and model.get_max_adstock() > 0:
+        model.adstock_df = None
+
+    model.raw_df = model.raw_df.loc[init_max_adstock:, :].reset_index(drop=True)
+    df = model.get_raw_df()
+
+    # turn-off info
+    logger = logging.getLogger("karpiu-planning-test")
+    logger.setLevel(30)
+
+    for idx, x in enumerate(tqdm(budget_ratios)):
+        budget_vector = np.array([[x], [1 - x]])
+        budget_matrix = budget_vector * base_weight
+        # print(budget_matrix)
+        spend_df = df.copy()
+        spend_df.loc[
+            (spend_df[date_col] >= budget_start) & (spend_df[date_col] <= budget_end),
+            channels,
+        ] = budget_matrix
+
+        attributor = AttributorGamma(
+            model=model,
+            df=spend_df,
+            start=budget_start,
+            end=budget_end,
+            logger=logger,
+        )
+        _, spend_attr, _, _ = attributor.make_attribution()
+        revs[idx] = np.sum(spend_attr.loc[:, channels].values * ltv)
+
+        pred_df = model.predict(spend_df, decompose=True)
+        msh = MMMShell(model)
+        # note that this is un-normalized comp; not equal to final marketing attribution
+        paid_arr = pred_df.loc[
+            (pred_df[date_col] >= budget_start) & (pred_df[date_col] <= budget_end),
+            "paid",
+        ].values
+        organic_attr_arr = msh.attr_organic[
+            (pred_df[date_col] >= budget_start) & (pred_df[date_col] <= budget_end)
+        ]
+        total_response[idx] = np.sum(organic_attr_arr * np.exp(paid_arr))
+
+    return budget_ratios, revs, total_response
diff --git a/karpiu/explainability/__init__.py b/karpiu/explainability/__init__.py
@@ -1,2 +1,3 @@
 from .attribution_alpha import AttributorAlpha
 from .attribution_beta import AttributorBeta
+from .attribution_gamma import AttributorGamma
diff --git a/karpiu/explainability/attribution_beta.py b/karpiu/explainability/attribution_beta.py
@@ -5,12 +5,12 @@
 from typing import Optional, Tuple, List
 
 from ..utils import adstock_process
-from ..model_shell import MMMShell
+from ..model_shell import MMMShellLegacy
 from ..models import MMM
 from .functions import make_attribution_numpy_beta
 
 
-class AttributorBeta(MMMShell):
+class AttributorBeta(MMMShellLegacy):
     def __init__(
         self,
         model: MMM,
@@ -38,6 +38,11 @@ def __init__(
         else:
             self.logger = logger
 
+        self.logger.warning(
+            "This is the Beta version of attribution class. Be aware this may be deprecated in future version."
+            "For future support, please use the AttributorGamma instead."
+        )
+
         # for debug
         self.delta_matrix = None