Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sensitivity check #29

Merged
merged 5 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions examples/3 - sensitivity check.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "a7cab1e4-d0c2-4a4e-88b6-137eaecc4b82",
"metadata": {},
"outputs": [],
"source": [
"import synthx as sx"
]
},
{
"cell_type": "markdown",
"id": "029ac6dc-bb5b-41e9-b97d-fd5487eb1e72",
"metadata": {},
"source": [
"### sensitivity check"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ec0c5ce1-cbd1-4d8c-bc92-fdc06bfc658d",
"metadata": {},
"outputs": [],
"source": [
"df = sx.sample(\n",
" n_units=20,\n",
" n_time=50,\n",
" n_observed_covariates=3,\n",
" n_unobserved_covariates=10,\n",
" intervention_units=1,\n",
" intervention_time=40,\n",
" intervention_effect=1,\n",
" noise_effect=0.5,\n",
" scale=2,\n",
" seed=42,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c818129e-cbb5-429e-a4dc-584bde7b18b5",
"metadata": {},
"outputs": [],
"source": [
"dataset = sx.Dataset(\n",
" df,\n",
" unit_column = 'unit',\n",
" time_column = 'time',\n",
" y_column = 'y',\n",
" covariate_columns = ['covariate_1', 'covariate_2', 'covariate_3'],\n",
" intervention_units=1,\n",
" intervention_time=40,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a5e959c6-0dae-4e58-ac6f-39c815435b45",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 163.48it/s]\n"
]
}
],
"source": [
"effect_test, effects_placebo, sc_test, scs_placebo = sx.placebo_test(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "faadcd70-376e-4654-9597-1b177eb16121",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.3800761193284167"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sx.stats.calc_p_value(effect_test, effects_placebo)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3c5ff52c-c850-430f-81ad-9fc1e3ac1d25",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"uplift: 1.0, p value: 0.3800761193284167. \n",
"uplift: 1.01, p value: 0.2294051992220436. \n",
"uplift: 1.02, p value: 0.1296003326679382. \n",
"uplift: 1.03, p value: 0.06916294574846259. \n",
"uplift: 1.04, p value: 0.03522837222393027. \n",
"uplift: 1.05, p value: 0.017306846430708796. \n",
" 2%|██▊ | 5/200 [00:02<01:26, 2.24it/s]\n"
]
},
{
"data": {
"text/plain": [
"1.05"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sx.sensitivity_check(dataset, effects_placebo)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "83f8b813-2fea-4c2b-abd0-2e0c79315364",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "synthx"
version = "0.3.2"
version = "0.4.0"
description = "A Python Library for Advanced Synthetic Control Analysis"
authors = ["kenki931128 <kenki.nkmr@gmail.com>"]
license = "MIT License"
Expand Down
3 changes: 2 additions & 1 deletion synthx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from synthx.core.dataset import Dataset
from synthx.core.result import SyntheticControlResult
from synthx.core.sample import sample
from synthx.method import placebo_test, synthetic_control
from synthx.method import placebo_test, sensitivity_check, synthetic_control


__all__ = [
Expand All @@ -13,5 +13,6 @@
'sample',
'synthetic_control',
'placebo_test',
'sensitivity_check',
'SyntheticControlResult',
]
54 changes: 54 additions & 0 deletions synthx/method.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional

import numpy as np
import polars as pl
import scipy.optimize
from joblib import Parallel, delayed
from tqdm import tqdm
Expand Down Expand Up @@ -193,3 +194,56 @@ def process_placebo(
scs_placebo.append(sc_placebo)

return effect_test, effects_placebo, sc_test, scs_placebo


def sensitivity_check(
dataset: sx.Dataset, effects_placebo: list[float], p_value_target: float = 0.03
) -> Optional[float]:
"""Perform a sensitivity check on the synthetic control results.

Args:
dataset (sx.Dataset): The dataset for the synthetic control analysis.
effects_placebo (list[float]): The list of placebo effects estimated.
p_value_target (float, optional): The target p-value threshold for statistical significance.

Returns:
float or None: The uplift which becomes statistically significant.
"""
df = dataset.data

for uplift in tqdm(np.arange(1, 3, 0.01)):
df_sensitivity = df.with_columns(
pl.when(
pl.col(dataset.unit_column).is_in(dataset.intervention_units)
& (pl.col(dataset.time_column) >= dataset.intervention_time)
)
.then(pl.col(dataset.y_column) * uplift)
.otherwise(pl.col(dataset.y_column))
.alias('y')
)

dataset_sensitivity = sx.Dataset(
df_sensitivity,
unit_column=dataset.unit_column,
time_column=dataset.time_column,
y_column=dataset.y_column,
covariate_columns=dataset.covariate_columns,
intervention_units=dataset.intervention_units,
intervention_time=dataset.intervention_time,
)

try:
sc = synthetic_control(dataset_sensitivity)
except NoFeasibleModelError:
tqdm.write(
f'sensitivity synthetic control optimization failed: uplift {uplift}.',
file=sys.stderr,
)
continue

p_value = sx.stats.calc_p_value(sc.estimate_effects(), effects_placebo)
tqdm.write(f'uplift: {uplift}, p value: {p_value}.', file=sys.stderr)
if p_value <= p_value_target:
return uplift

return None
65 changes: 64 additions & 1 deletion tests/test_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import synthx as sx
from synthx.errors import NoFeasibleModelError
from synthx.method import synthetic_control
from synthx.method import sensitivity_check, synthetic_control


class TestSyntheticControl:
Expand Down Expand Up @@ -97,3 +97,66 @@ def test_placebo_test_number_of_units(self, dummy_dataset: sx.Dataset) -> None:
)
assert len(effects_placebo) == len(control_units)
assert len(scs_placebo) == len(control_units)


class TestSensitivityCheck:
@pytest.fixture
def dummy_dataset(self) -> sx.Dataset:
data = pl.DataFrame(
{
'unit': [1, 1, 1, 2, 2, 2, 3, 3, 3],
'time': [1, 2, 3, 1, 2, 3, 1, 2, 3],
'y': [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
'cov1': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
}
)
return sx.Dataset(
data=data,
unit_column='unit',
time_column='time',
y_column='y',
covariate_columns=['cov1'],
intervention_units=[1],
intervention_time=2,
)

def test_sensitivity_check_uplift_found(
self, dummy_dataset: sx.Dataset, mocker: MockerFixture
) -> None:
mocker.patch(
'synthx.method.synthetic_control',
return_value=mocker.Mock(estimate_effects=mocker.Mock(return_value=1.5)),
)
mocker.patch('synthx.stats.calc_p_value', side_effect=[0.05, 0.02])

uplift = sensitivity_check(
dummy_dataset, effects_placebo=[1.0, 1.1, 1.2], p_value_target=0.03
)

assert uplift == 1.01

def test_sensitivity_check_no_uplift_found(
self, dummy_dataset: sx.Dataset, mocker: MockerFixture
) -> None:
mocker.patch(
'synthx.method.synthetic_control',
return_value=mocker.Mock(estimate_effects=mocker.Mock(return_value=1.5)),
)
mocker.patch('synthx.stats.calc_p_value', return_value=0.1)

uplift = sensitivity_check(
dummy_dataset, effects_placebo=[1.0, 1.1, 1.2], p_value_target=0.03
)

assert uplift is None

def test_sensitivity_check_optimization_failure(
self, dummy_dataset: sx.Dataset, mocker: MockerFixture
) -> None:
mocker.patch('synthx.method.synthetic_control', side_effect=NoFeasibleModelError)

uplift = sensitivity_check(
dummy_dataset, effects_placebo=[1.0, 1.1, 1.2], p_value_target=0.03
)

assert uplift is None