From cc9698272da99b5112d48720b2f4898e2a2170d2 Mon Sep 17 00:00:00 2001 From: kenki931128 Date: Tue, 23 Apr 2024 00:08:31 +0800 Subject: [PATCH 1/5] feat: sensitivity check --- synthx/method.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/synthx/method.py b/synthx/method.py index badf845..ad6d843 100644 --- a/synthx/method.py +++ b/synthx/method.py @@ -4,6 +4,7 @@ from typing import Optional import numpy as np +import polars as pl import scipy.optimize from joblib import Parallel, delayed from tqdm import tqdm @@ -193,3 +194,56 @@ def process_placebo( scs_placebo.append(sc_placebo) return effect_test, effects_placebo, sc_test, scs_placebo + + +def sensitivity_check( + dataset: sx.Dataset, effects_placebo: list[float], p_value_target: float = 0.03 +) -> Optional[float]: + """Perform a sensitivity check on the synthetic control results. + + Args: + dataset (sx.Dataset): The dataset for the synthetic control analysis. + effects_placebo (list[float]): The list of placebo effects estimated. + p_value_target (float, optional): The target p-value threshold for statistical significance. + + Returns: + float or None: The uplift which becomes statistically significant. + """ + df = dataset.data + + for uplift in tqdm(np.arange(1, 3, 0.01)): + df_sensitivity = df.with_columns( + pl.when( + pl.col(dataset.unit_column).is_in(dataset.intervention_units) + & (pl.col(dataset.time_column) >= dataset.intervention_time) + ) + .then(pl.col(dataset.y_column) * uplift) + .otherwise(pl.col(dataset.y_column)) + .alias('y') + ) + + dataset_sensitivity = sx.Dataset( + df_sensitivity, + unit_column=dataset.unit_column, + time_column=dataset.time_column, + y_column=dataset.y_column, + covariate_columns=dataset.covariate_columns, + intervention_units=dataset.intervention_units, + intervention_time=dataset.intervention_time, + ) + + try: + sc = synthetic_control(dataset_sensitivity) + except NoFeasibleModelError: + tqdm.write( + f'sensitivity synthetic control optimization failed: uplift {uplift}.', + file=sys.stderr, + ) + continue + + p_value = sx.stats.calc_p_value(sc.estimate_effects(), effects_placebo) + tqdm.write(f'uplift: {uplift}, p value: {p_value}.', file=sys.stderr) + if p_value <= p_value_target: + return uplift + + return None From b4a693ecd799d7b047c98837e16a9f8dcff75f88 Mon Sep 17 00:00:00 2001 From: kenki931128 Date: Tue, 23 Apr 2024 00:08:50 +0800 Subject: [PATCH 2/5] test: sensitivity check --- tests/test_method.py | 65 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/tests/test_method.py b/tests/test_method.py index 353dc2d..9f6116f 100644 --- a/tests/test_method.py +++ b/tests/test_method.py @@ -8,7 +8,7 @@ import synthx as sx from synthx.errors import NoFeasibleModelError -from synthx.method import synthetic_control +from synthx.method import sensitivity_check, synthetic_control class TestSyntheticControl: @@ -97,3 +97,66 @@ def test_placebo_test_number_of_units(self, dummy_dataset: sx.Dataset) -> None: ) assert len(effects_placebo) == len(control_units) assert len(scs_placebo) == len(control_units) + + +class TestSensitivityCheck: + @pytest.fixture + def dummy_dataset(self) -> sx.Dataset: + data = pl.DataFrame( + { + 'unit': [1, 1, 1, 2, 2, 2, 3, 3, 3], + 'time': [1, 2, 3, 1, 2, 3, 1, 2, 3], + 'y': [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + 'cov1': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + } + ) + return sx.Dataset( + data=data, + unit_column='unit', + time_column='time', + y_column='y', + covariate_columns=['cov1'], + intervention_units=[1], + intervention_time=2, + ) + + def test_sensitivity_check_uplift_found( + self, dummy_dataset: sx.Dataset, mocker: MockerFixture + ) -> None: + mocker.patch( + 'synthx.method.synthetic_control', + return_value=mocker.Mock(estimate_effects=mocker.Mock(return_value=1.5)), + ) + mocker.patch('synthx.stats.calc_p_value', side_effect=[0.05, 0.02]) + + uplift = sensitivity_check( + dummy_dataset, effects_placebo=[1.0, 1.1, 1.2], p_value_target=0.03 + ) + + assert uplift == 1.01 + + def test_sensitivity_check_no_uplift_found( + self, dummy_dataset: sx.Dataset, mocker: MockerFixture + ) -> None: + mocker.patch( + 'synthx.method.synthetic_control', + return_value=mocker.Mock(estimate_effects=mocker.Mock(return_value=1.5)), + ) + mocker.patch('synthx.stats.calc_p_value', return_value=0.1) + + uplift = sensitivity_check( + dummy_dataset, effects_placebo=[1.0, 1.1, 1.2], p_value_target=0.03 + ) + + assert uplift is None + + def test_sensitivity_check_optimization_failure( + self, dummy_dataset: sx.Dataset, mocker: MockerFixture + ) -> None: + mocker.patch('synthx.method.synthetic_control', side_effect=NoFeasibleModelError) + + uplift = sensitivity_check( + dummy_dataset, effects_placebo=[1.0, 1.1, 1.2], p_value_target=0.03 + ) + + assert uplift is None From ef6f4e11aa73cebf0e51b19c85063daa60c1a84d Mon Sep 17 00:00:00 2001 From: kenki931128 Date: Tue, 23 Apr 2024 00:09:28 +0800 Subject: [PATCH 3/5] feat: sensitivity check --- synthx/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synthx/__init__.py b/synthx/__init__.py index 7bec886..fc2b9c6 100644 --- a/synthx/__init__.py +++ b/synthx/__init__.py @@ -4,7 +4,7 @@ from synthx.core.dataset import Dataset from synthx.core.result import SyntheticControlResult from synthx.core.sample import sample -from synthx.method import placebo_test, synthetic_control +from synthx.method import placebo_test, sensitivity_check, synthetic_control __all__ = [ @@ -13,5 +13,6 @@ 'sample', 'synthetic_control', 'placebo_test', + 'sensitivity_check', 'SyntheticControlResult', ] From 3db470dfe13a57ce009d3a86ce7b6d9c12dfd578 Mon Sep 17 00:00:00 2001 From: kenki931128 Date: Tue, 23 Apr 2024 00:09:52 +0800 Subject: [PATCH 4/5] example: sensitivity check --- examples/3 - sensitivity check.ipynb | 163 +++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 examples/3 - sensitivity check.ipynb diff --git a/examples/3 - sensitivity check.ipynb b/examples/3 - sensitivity check.ipynb new file mode 100644 index 0000000..11bbc38 --- /dev/null +++ b/examples/3 - sensitivity check.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a7cab1e4-d0c2-4a4e-88b6-137eaecc4b82", + "metadata": {}, + "outputs": [], + "source": [ + "import synthx as sx" + ] + }, + { + "cell_type": "markdown", + "id": "029ac6dc-bb5b-41e9-b97d-fd5487eb1e72", + "metadata": {}, + "source": [ + "### sensitivity check" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ec0c5ce1-cbd1-4d8c-bc92-fdc06bfc658d", + "metadata": {}, + "outputs": [], + "source": [ + "df = sx.sample(\n", + " n_units=20,\n", + " n_time=50,\n", + " n_observed_covariates=3,\n", + " n_unobserved_covariates=10,\n", + " intervention_units=1,\n", + " intervention_time=40,\n", + " intervention_effect=1,\n", + " noise_effect=0.5,\n", + " scale=2,\n", + " seed=42,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c818129e-cbb5-429e-a4dc-584bde7b18b5", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = sx.Dataset(\n", + " df,\n", + " unit_column = 'unit',\n", + " time_column = 'time',\n", + " y_column = 'y',\n", + " covariate_columns = ['covariate_1', 'covariate_2', 'covariate_3'],\n", + " intervention_units=1,\n", + " intervention_time=40,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a5e959c6-0dae-4e58-ac6f-39c815435b45", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 163.48it/s]\n" + ] + } + ], + "source": [ + "effect_test, effects_placebo, sc_test, scs_placebo = sx.placebo_test(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "faadcd70-376e-4654-9597-1b177eb16121", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.3800761193284167" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sx.stats.calc_p_value(effect_test, effects_placebo)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3c5ff52c-c850-430f-81ad-9fc1e3ac1d25", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "uplift: 1.0, p value: 0.3800761193284167. \n", + "uplift: 1.01, p value: 0.2294051992220436. \n", + "uplift: 1.02, p value: 0.1296003326679382. \n", + "uplift: 1.03, p value: 0.06916294574846259. \n", + "uplift: 1.04, p value: 0.03522837222393027. \n", + "uplift: 1.05, p value: 0.017306846430708796. \n", + " 2%|██▊ | 5/200 [00:02<01:26, 2.24it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "1.05" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sx.sensitivity_check(dataset, effects_placebo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83f8b813-2fea-4c2b-abd0-2e0c79315364", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 5cef517f542bbae69d6b2cd5726842dcfece925f Mon Sep 17 00:00:00 2001 From: kenki931128 Date: Tue, 23 Apr 2024 00:10:07 +0800 Subject: [PATCH 5/5] ci: v0.4.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 781fd66..c86af93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "synthx" -version = "0.3.2" +version = "0.4.0" description = "A Python Library for Advanced Synthetic Control Analysis" authors = ["kenki931128 "] license = "MIT License"