From 592f718216c43850ca73d44156c4b1dbd000185e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Czy=C5=BC?= <pczyz@protonmail.com>
Date: Wed, 13 Mar 2024 17:24:45 +0100
Subject: [PATCH] Clean up redundant files (#24)

* Remove redundant scripts.

* Update README
---
 .gitignore                              |   2 +
 README.md                               |   2 -
 requirements.txt                        |   3 +-
 scripts/design_categorical.py           | 156 ---------------
 scripts/experiment1/1-1.py              |  32 ---
 scripts/experiment1/1-2.py              |  33 ---
 scripts/experiment1/1-3.py              |  33 ---
 scripts/experiment1/plot_figure.py      |  97 ---------
 scripts/experiment_external_dataset.py  | 134 -------------
 scripts/experiment_external_dataset2.py | 179 -----------------
 scripts/experiment_gaussian.py          | 144 -------------
 scripts/run_categorical.py              | 256 ------------------------
 12 files changed, 4 insertions(+), 1067 deletions(-)
 delete mode 100644 scripts/design_categorical.py
 delete mode 100644 scripts/experiment1/1-1.py
 delete mode 100644 scripts/experiment1/1-2.py
 delete mode 100644 scripts/experiment1/1-3.py
 delete mode 100644 scripts/experiment1/plot_figure.py
 delete mode 100644 scripts/experiment_external_dataset.py
 delete mode 100644 scripts/experiment_external_dataset2.py
 delete mode 100644 scripts/experiment_gaussian.py
 delete mode 100644 scripts/run_categorical.py

diff --git a/.gitignore b/.gitignore
index 5b6e898..260a381 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 *.nb
 *.ipynb
 
+data/Darmanis
+
 # Directories for local files
 local/
 private/
diff --git a/README.md b/README.md
index 975b049..6a3a324 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,3 @@
-![build](https://github.com/labelshift/labelshift/actions/workflows/build.yml/badge.svg)
-
 # Label Shift
 
 Python library for *quantification* (estimating the class prevalence in an unlabeled data set) under the prior probability shift assumption.
diff --git a/requirements.txt b/requirements.txt
index 0aa9708..4446c8f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,10 @@
 arviz
+matplotlib
 numpy
-petname
 pydantic
 scikit-learn
 scipy
+subplots_from_axsize
 # Code quality tools
 black
 flake8
diff --git a/scripts/design_categorical.py b/scripts/design_categorical.py
deleted file mode 100644
index 6e0ce90..0000000
--- a/scripts/design_categorical.py
+++ /dev/null
@@ -1,156 +0,0 @@
-"""Experimental design for the categorical experiment.
-
-Use it to generate a list of commands to be run."""
-from pathlib import Path
-from typing import Optional
-
-DIRECTORY = Path("data/generated/categorical_experiment")
-
-ESTIMATOR_CONFIGURATIONS = {
-    "MAP-1": "--algorithm MAP --bayesian-alpha 1",
-    "MAP-2": "--algorithm MAP --bayesian-alpha 2",
-    "CC": "--algorithm CC",
-    "IR": "--algorithm IR --restricted true",
-    "BBSE": "--algorithm BBSE",
-}
-
-N_SEEDS: int = 2
-
-N_LABELED: int = 1_000
-N_UNLABELED: int = 500
-QUALITY_LABELED: float = 0.85
-PI_UNLABELED: float = 0.7
-L: int = 5
-K: int = 5
-
-
-def command(
-    estimator_key: str,
-    seed: int,
-    output_dir: Path,
-    n_y: int = L,
-    n_c: int = K,
-    n_labeled: int = N_LABELED,
-    n_unlabeled: int = N_UNLABELED,
-    quality_labeled: float = QUALITY_LABELED,
-    quality_unlabeled: Optional[float] = None,
-    pi_unlabeled: float = PI_UNLABELED,
-) -> str:
-    estimator_args = ESTIMATOR_CONFIGURATIONS[estimator_key]
-
-    quality_unlabeled_str = (
-        "" if quality_unlabeled is None else f"--quality-unlabeled {quality_unlabeled}"
-    )
-
-    print(
-        f"python scripts/run_categorical.py "
-        f"--n-labeled {n_labeled} --n-unlabeled {n_unlabeled} "
-        f"--quality {quality_labeled} {quality_unlabeled_str} "
-        f"--prevalence-unlabeled {pi_unlabeled} "
-        f"--seed {seed} "
-        f"--output-dir {output_dir} "
-        f"--K {n_y} --L {n_c} "
-        f"--tag {estimator_key} {estimator_args}"
-    )
-
-
-def experiment_change_prevalence() -> None:
-    """Fix L = K = 5 and change pi'_1."""
-    for seed in range(N_SEEDS):
-        for pi_unlabeled in [0.5, 0.6, 0.7, 0.8, 0.9]:
-            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
-                output_dir = (
-                    DIRECTORY / "change_prevalence" / f"{algorithm}-{pi_unlabeled}"
-                )
-                command(
-                    output_dir=output_dir,
-                    pi_unlabeled=pi_unlabeled,
-                    seed=seed,
-                    estimator_key=algorithm,
-                )
-
-
-def experiment_change_n_unlabeled() -> None:
-    """Change N'."""
-    for seed in range(N_SEEDS):
-        for n_unlabeled in [10, 50, 100, 500, 1000, 10000]:
-            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
-                output_dir = (
-                    DIRECTORY / "change_n_unlabeled" / f"{algorithm}-{n_unlabeled}"
-                )
-                command(
-                    n_unlabeled=n_unlabeled,
-                    seed=seed,
-                    estimator_key=algorithm,
-                    output_dir=output_dir,
-                )
-
-
-def experiment_change_k() -> None:
-    """Change K, keeping L fixed."""
-    for seed in range(N_SEEDS):
-        for n_c in [2, 3, 5, 7, 9]:
-            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
-                output_dir = DIRECTORY / "change_k" / f"{algorithm}-{n_c}"
-                command(
-                    seed=seed,
-                    output_dir=output_dir,
-                    estimator_key=algorithm,
-                    n_c=n_c,
-                )
-
-
-def experiment_change_jointly_l_and_k() -> None:
-    """Jointly change L = K."""
-    for seed in range(N_SEEDS):
-        for lk in [2, 3, 5, 7, 9, 10]:
-            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
-                output_dir = DIRECTORY / "change_jointly_lk" / f"{algorithm}-{lk}"
-                command(
-                    seed=seed,
-                    estimator_key=algorithm,
-                    output_dir=output_dir,
-                    n_c=lk,
-                    n_y=lk,
-                )
-
-
-def experiment_change_quality() -> None:
-    """Change quality."""
-    for seed in range(N_SEEDS):
-        for quality in [0.55, 0.65, 0.75, 0.85, 0.95]:
-            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
-                output_dir = DIRECTORY / "change_quality" / f"{algorithm}-{quality}"
-                command(
-                    quality_labeled=quality,
-                    seed=seed,
-                    estimator_key=algorithm,
-                    output_dir=output_dir,
-                )
-
-
-def experiment_misspecified() -> None:
-    """Change quality in the unlabeled population, so that the model is misspecified."""
-    for seed in range(N_SEEDS):
-        for quality_prime in [0.45, 0.55, 0.65, 0.75, 0.80, 0.85, 0.90, 0.95]:
-            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
-                output_dir = DIRECTORY / "misspecified" / f"{algorithm}-{quality_prime}"
-                command(
-                    quality_unlabeled=quality_prime,
-                    seed=seed,
-                    output_dir=output_dir,
-                    estimator_key=algorithm,
-                )
-
-
-def main() -> None:
-    experiment_change_prevalence()
-    experiment_change_n_unlabeled()
-    experiment_change_quality()
-    experiment_change_jointly_l_and_k()
-    experiment_change_k()
-    experiment_misspecified()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/experiment1/1-1.py b/scripts/experiment1/1-1.py
deleted file mode 100644
index faf0d71..0000000
--- a/scripts/experiment1/1-1.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Fixed q = 0.85, N' = 500 and changed the prevalence π'1 in range {0.5, 0.6, 0.7, 0.8, 0.9}.
-"""
-algorithms = [
-    "ClassifyAndCount",
-    "RatioEstimator",
-    "BlackBoxShiftEstimator",
-    "BayesianMAP",
-]
-
-
-def main() -> None:
-    n_labeled = 1000
-    n_unlabeled = 500
-    quality = 0.85
-    n_seeds = 30
-
-    pi_labeled = 0.5
-
-    for pi_unlabeled in [0.5, 0.6, 0.7, 0.8, 0.9]:
-        for seed in range(n_seeds):
-            for algorithm in algorithms:
-                try:
-                    output_dir = f"experiment1-1/{algorithm}"
-                    command = f"python scripts/experiment_categorical.py --n-labeled {n_labeled} --n-unlabeled {n_unlabeled} --quality {quality} --pi-labeled {pi_labeled} --pi-unlabeled {pi_unlabeled} --seed {seed} --algorithm {algorithm} --output-dir {output_dir}"
-                    print(command)
-                except Exception as e:
-                    print(e)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/experiment1/1-2.py b/scripts/experiment1/1-2.py
deleted file mode 100644
index 9a924d0..0000000
--- a/scripts/experiment1/1-2.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""
-Fixed q = 0.85, π′1 = 0.7 and changed N' in range
-{10, 50, 100, 500, 1000, 10000}
-"""
-algorithms = [
-    "ClassifyAndCount",
-    "RatioEstimator",
-    "BlackBoxShiftEstimator",
-    "BayesianMAP",
-]
-
-
-def main() -> None:
-    n_labeled = 1000
-    quality = 0.85
-    n_seeds = 30
-
-    pi_unlabeled = 0.7
-    pi_labeled = 0.5
-
-    for n_unlabeled in [10, 50, 100, 500, 1000, 10000]:
-        for seed in range(n_seeds):
-            for algorithm in algorithms:
-                try:
-                    output_dir = f"experiment1-2/{algorithm}"
-                    command = f"python scripts/experiment_categorical.py --n-labeled {n_labeled} --n-unlabeled {n_unlabeled} --quality {quality} --pi-labeled {pi_labeled} --pi-unlabeled {pi_unlabeled} --seed {seed} --algorithm {algorithm} --output-dir {output_dir}"
-                    print(command)
-                except Exception as e:
-                    print(e)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/experiment1/1-3.py b/scripts/experiment1/1-3.py
deleted file mode 100644
index 3389f8b..0000000
--- a/scripts/experiment1/1-3.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""
-Fixed π′1 = 0.7, N'= 500 and changed q in range
-{0.55, 0.65, 0.75, 0.85, 0.95}
-"""
-algorithms = [
-    "ClassifyAndCount",
-    "RatioEstimator",
-    "BlackBoxShiftEstimator",
-    "BayesianMAP",
-]
-
-
-def main() -> None:
-    n_labeled = 1000
-    n_unlabeled = 500
-    n_seeds = 30
-
-    pi_unlabeled = 0.7
-    pi_labeled = 0.5
-
-    for quality in [0.55, 0.65, 0.75, 0.85, 0.95]:
-        for seed in range(n_seeds):
-            for algorithm in algorithms:
-                try:
-                    output_dir = f"experiment1-3/{algorithm}"
-                    command = f"python scripts/experiment_categorical.py --n-labeled {n_labeled} --n-unlabeled {n_unlabeled} --quality {quality} --pi-labeled {pi_labeled} --pi-unlabeled {pi_unlabeled} --seed {seed} --algorithm {algorithm} --output-dir {output_dir}"
-                    print(command)
-                except Exception as e:
-                    print(e)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/experiment1/plot_figure.py b/scripts/experiment1/plot_figure.py
deleted file mode 100644
index d0aa63a..0000000
--- a/scripts/experiment1/plot_figure.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import json
-import string
-from pathlib import Path
-
-import seaborn as sns
-import matplotlib.pyplot as plt
-import pandas as pd
-
-
-rename_dict = {
-    "ClassifyAndCount": "CC",
-    "RatioEstimator": "IR",
-    "BlackBoxShiftEstimator": "BBSE",
-    "BayesianMAP": "MAP",
-}
-
-hue_order = [
-    "CC",
-    "IR",
-    "BBSE",
-    "MAP",
-]
-
-
-def file_to_row(file):
-    with open(file) as f:
-        x = json.load(f)
-    return {
-        "Algorithm": rename_dict[x["algorithm"]],
-        "true": x["true"][0],
-        "estimated": x["estimated"][0],
-        "quality": x["sampler"]["p_c_cond_y"][0][0],
-        "n_labeled": x["sampler"]["n_labeled"],
-        "n_unlabeled": x["sampler"]["n_unlabeled"],
-    }
-
-
-def experiment_directory_to_dataframe(experiment_directory) -> pd.DataFrame:
-    files = list(
-        Path(experiment_directory).rglob(
-            "*.json",
-        )
-    )
-    df = pd.DataFrame([file_to_row(f) for f in files])
-    df["error"] = df["estimated"] - df["true"]
-    return df
-
-
-def main() -> None:
-    fig, axs = plt.subplots(3, 1, figsize=(4, 12), sharey=False)
-
-    experiment1 = "experiment1-1"
-    df1 = experiment_directory_to_dataframe(experiment1)
-    sns.boxplot(
-        df1, x="true", y="error", hue="Algorithm", ax=axs[0], hue_order=hue_order
-    )
-    axs[0].set_xlabel(r"Prevalence $\pi'_1$")
-    axs[0].set_ylabel(r"Signed difference $\hat \pi'_1 - \pi'_1$")
-
-    experiment2 = "experiment1-2"
-    df2 = experiment_directory_to_dataframe(experiment2)
-    sns.boxplot(
-        df2, x="n_unlabeled", y="error", hue="Algorithm", ax=axs[1], hue_order=hue_order
-    )
-
-    axs[1].set_xlabel(r"Unlabeled data set size $N'$")
-    axs[1].set_ylabel(r"Signed difference $\hat \pi'_1 - \pi'_1$")
-    axs[1].legend([], [], frameon=False)
-
-    experiment3 = "experiment1-3"
-    df3 = experiment_directory_to_dataframe(experiment3)
-    sns.boxplot(
-        df3, x="quality", y="error", hue="Algorithm", ax=axs[2], hue_order=hue_order
-    )
-
-    axs[2].set_xlabel(r"Classifier quality $q$")
-    axs[2].set_ylabel(r"Signed difference $\hat \pi'_1 - \pi'_1$")
-    axs[2].legend([], [], frameon=False)
-
-    for n, ax in enumerate(axs):
-        ax.text(
-            -0.1,
-            1.1,
-            string.ascii_uppercase[n],
-            transform=ax.transAxes,
-            size=20,
-            weight="bold",
-        )
-
-    sns.move_legend(axs[0], "lower left")  # , bbox_to_anchor=(1, 1))
-
-    fig.tight_layout()
-    fig.savefig("experiment1.pdf")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/experiment_external_dataset.py b/scripts/experiment_external_dataset.py
deleted file mode 100644
index a5a892c..0000000
--- a/scripts/experiment_external_dataset.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import enum
-
-import numpy as np
-import sklearn.datasets
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LogisticRegression
-
-import labelshift.datasets.split as split
-import labelshift.summary_statistic as summ
-
-import labelshift.algorithms.api as algos
-import labelshift.algorithms.ratio_estimator as re
-from labelshift.algorithms.expectation_maximization import expectation_maximization
-
-
-class Algorithm(enum.Enum):
-    EM = "ExpectationMaximization"
-    CC = "ClassifyAndCount"
-    BBSE_HARD = "BBSE-Hard"
-    RATIO_HARD = "InvariantRatio-Hard"
-    BAYESIAN = "Bayesian-MAP"
-    RATIO_SOFT = "InvariantRatio-Soft"
-
-
-def get_estimate(
-    algorithm: Algorithm,
-    n_y_c_labeled: np.ndarray,
-    n_c_unlabeled: np.ndarray,
-    y_labeled: np.ndarray,
-    prob_c_labeled: np.ndarray,
-    prob_c_unlabeled: np.ndarray,
-    labeled_prevalence: np.ndarray,
-) -> np.ndarray:
-    """Function running the (point) prevalence estimator.
-
-    Args:
-        algorithm: estimator
-        n_y_c_labeled: matrix with counts of predictions and true values, shape (L, K)
-        n_c_unlabeled: vector with prediction counts on unlabeled data set, shape (K,)
-        y_labeled: true labels in the labeled data set, shape (N,)
-        prob_c_labeled: predictions of the classifier on the labeled data set, shape (N, K)
-        prob_c_unlabeled: predictions of the classifier on the unlabeled data set, shape (N', K)
-        labeled_prevalence: prevalence vector on the labeled distribution, shape (L,)
-    """
-    summary_statistic = algos.SummaryStatistic(
-        n_y_labeled=None, n_y_and_c_labeled=n_y_c_labeled, n_c_unlabeled=n_c_unlabeled
-    )
-
-    if algorithm == Algorithm.EM:
-        return expectation_maximization(
-            predictions=prob_c_unlabeled, training_prevalences=labeled_prevalence
-        )
-    elif algorithm == Algorithm.CC:
-        return algos.ClassifyAndCount().estimate_from_summary_statistic(
-            summary_statistic
-        )
-    elif algorithm == Algorithm.BBSE_HARD:
-        return algos.BlackBoxShiftEstimator(
-            p_y_labeled=labeled_prevalence
-        ).estimate_from_summary_statistic(summary_statistic)
-    elif algorithm == Algorithm.RATIO_HARD:
-        return algos.InvariantRatioEstimator(
-            restricted=True
-        ).estimate_from_summary_statistic(summary_statistic)
-    elif algorithm == Algorithm.BAYESIAN:
-        return algos.DiscreteCategoricalMAPEstimator().estimate_from_summary_statistic(
-            summary_statistic
-        )
-    elif algorithm == Algorithm.RATIO_SOFT:
-        return re.calculate_vector_and_matrix_from_predictions(
-            unlabeled_predictions=prob_c_unlabeled,
-            labeled_predictions=prob_c_labeled,
-            labeled_ground_truth=y_labeled,
-        )
-    else:
-        raise ValueError(f"Algorithm {algorithm} not recognized.")
-
-
-def main() -> None:
-    L = 2
-    K = L
-    dataset = sklearn.datasets.load_breast_cancer()
-    print(len(dataset.target))
-
-    random_seed: int = 22
-    n_training_examples: int = 200
-    n_labeled_examples: int = 100
-    n_unlabeled_examples: int = 150
-    prevalence_labeled: np.ndarray = np.ones(2) / 2
-    prevalence_unlabeled: np.ndarray = np.asarray([0.3, 0.7])
-
-    specification = split.SplitSpecification(
-        train=np.asarray(prevalence_labeled * n_training_examples, dtype=int).tolist(),
-        valid=np.asarray(prevalence_labeled * n_labeled_examples, dtype=int).tolist(),
-        test=np.asarray(
-            prevalence_unlabeled * n_unlabeled_examples, dtype=int
-        ).tolist(),
-    )
-
-    datasets = split.split_dataset(
-        dataset=dataset, specification=specification, random_seed=random_seed
-    )
-
-    classifier = DecisionTreeClassifier(random_state=random_seed + 1)
-    classifier = RandomForestClassifier(random_state=random_seed + 1)
-    classifier = LogisticRegression(random_state=random_seed + 1)
-    classifier.fit(datasets.train_x, datasets.train_y)
-
-    # The count values
-    n_y_c_labeled = summ.count_values_joint(
-        L, K, datasets.valid_y, classifier.predict(datasets.valid_x)
-    )
-    n_c_unlabeled = summ.count_values(K, classifier.predict(datasets.test_x))
-
-    labeled_probabilities = classifier.predict_proba(datasets.valid_x)
-    unlabeled_probabilities = classifier.predict_proba(datasets.test_x)
-
-    for alg in Algorithm:
-        print(alg)
-        estimate = get_estimate(
-            algorithm=alg,
-            n_y_c_labeled=n_y_c_labeled,
-            n_c_unlabeled=n_c_unlabeled,
-            y_labeled=datasets.valid_y,
-            prob_c_labeled=labeled_probabilities,
-            prob_c_unlabeled=unlabeled_probabilities,
-            labeled_prevalence=prevalence_labeled,
-        )
-        print(estimate)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/experiment_external_dataset2.py b/scripts/experiment_external_dataset2.py
deleted file mode 100644
index e143fef..0000000
--- a/scripts/experiment_external_dataset2.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import enum
-
-import arviz as az
-import matplotlib.pyplot as plt
-import numpy as np
-import pymc as pm
-import sklearn.datasets
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LogisticRegression
-
-import labelshift.datasets.split as split
-import labelshift.summary_statistic as summ
-
-import labelshift.algorithms.api as algos
-import labelshift.algorithms.ratio_estimator as re
-import labelshift.algorithms.bayesian_discrete as bay
-from labelshift.algorithms.expectation_maximization import expectation_maximization
-
-plt.rcParams.update({"font.size": 14})
-
-
-class Algorithm(enum.Enum):
-    EM = "EM"
-    CC = "CC"
-    BBSE_HARD = "BBSE"
-    RATIO_HARD = "IR: hard"
-    RATIO_SOFT = "IR: soft"
-
-
-def get_estimate(
-    algorithm: Algorithm,
-    n_y_c_labeled: np.ndarray,
-    n_c_unlabeled: np.ndarray,
-    y_labeled: np.ndarray,
-    prob_c_labeled: np.ndarray,
-    prob_c_unlabeled: np.ndarray,
-    labeled_prevalence: np.ndarray,
-) -> np.ndarray:
-    """Function running the (point) prevalence estimator.
-
-    Args:
-        algorithm: estimator
-        n_y_c_labeled: matrix with counts of predictions and true values, shape (L, K)
-        n_c_unlabeled: vector with prediction counts on unlabeled data set, shape (K,)
-        y_labeled: true labels in the labeled data set, shape (N,)
-        prob_c_labeled: predictions of the classifier on the labeled data set, shape (N, K)
-        prob_c_unlabeled: predictions of the classifier on the unlabeled data set, shape (N', K)
-        labeled_prevalence: prevalence vector on the labeled distribution, shape (L,)
-    """
-    summary_statistic = algos.SummaryStatistic(
-        n_y_labeled=None, n_y_and_c_labeled=n_y_c_labeled, n_c_unlabeled=n_c_unlabeled
-    )
-
-    if algorithm == Algorithm.EM:
-        return expectation_maximization(
-            predictions=prob_c_unlabeled, training_prevalences=labeled_prevalence
-        )
-    elif algorithm == Algorithm.CC:
-        return algos.ClassifyAndCount().estimate_from_summary_statistic(
-            summary_statistic
-        )
-    elif algorithm == Algorithm.BBSE_HARD:
-        return algos.BlackBoxShiftEstimator(
-            p_y_labeled=labeled_prevalence
-        ).estimate_from_summary_statistic(summary_statistic)
-    elif algorithm == Algorithm.RATIO_HARD:
-        return algos.InvariantRatioEstimator(
-            restricted=True
-        ).estimate_from_summary_statistic(summary_statistic)
-    elif algorithm == Algorithm.RATIO_SOFT:
-        return re.calculate_vector_and_matrix_from_predictions(
-            unlabeled_predictions=prob_c_unlabeled,
-            labeled_predictions=prob_c_labeled,
-            labeled_ground_truth=y_labeled,
-        )
-    else:
-        raise ValueError(f"Algorithm {algorithm} not recognized.")
-
-
-def main() -> None:
-    L = 2
-    K = L
-    dataset = sklearn.datasets.load_breast_cancer()
-    print(len(dataset.target))
-
-    ymax: float = 7.0
-    random_seed: int = 20
-    n_training_examples: int = 200
-    n_labeled_examples: int = 100
-    n_unlabeled_examples: int = 150
-    prevalence_labeled: np.ndarray = np.ones(2) / 2
-    prevalence_unlabeled: np.ndarray = np.asarray([0.3, 0.7])
-
-    specification = split.SplitSpecification(
-        train=np.asarray(prevalence_labeled * n_training_examples, dtype=int).tolist(),
-        valid=np.asarray(prevalence_labeled * n_labeled_examples, dtype=int).tolist(),
-        test=np.asarray(
-            prevalence_unlabeled * n_unlabeled_examples, dtype=int
-        ).tolist(),
-    )
-
-    datasets = split.split_dataset(
-        dataset=dataset, specification=specification, random_seed=random_seed
-    )
-
-    # classifier = DecisionTreeClassifier(random_state=random_seed + 1)
-    classifier = RandomForestClassifier(random_state=random_seed + 1)
-    # classifier = LogisticRegression(random_state=random_seed + 1)
-    classifier.fit(datasets.train_x, datasets.train_y)
-
-    # The count values
-    n_y_c_labeled = summ.count_values_joint(
-        L, K, datasets.valid_y, classifier.predict(datasets.valid_x)
-    )
-    n_c_unlabeled = summ.count_values(K, classifier.predict(datasets.test_x))
-
-    labeled_probabilities = classifier.predict_proba(datasets.valid_x)
-    unlabeled_probabilities = classifier.predict_proba(datasets.test_x)
-
-    with bay.build_model(
-        n_y_and_c_labeled=n_y_c_labeled,
-        n_c_unlabeled=n_c_unlabeled,
-    ):
-        idata = pm.sample()
-
-    fig, ax = plt.subplots(figsize=(6, 4))
-    _, ax_trash = plt.subplots()
-
-    az.plot_posterior(idata, ax=[ax, ax_trash], var_names=bay.P_TEST_Y)
-    ax.set_title(r"$\pi'_1$ posterior")
-
-    ax.vlines(
-        x=prevalence_unlabeled[0],
-        ymin=0,
-        ymax=ymax,
-        label="Ground truth",
-        colors=["k"],
-        linestyles=["--"],
-    )
-
-    linestyles = [
-        "dashdot",
-        (0, (1, 1)),
-        "solid",
-        "dashed",
-        (0, (3, 10, 1, 10)),
-    ]
-
-    for i, alg in enumerate(Algorithm):
-        print(alg)
-        estimate = get_estimate(
-            algorithm=alg,
-            n_y_c_labeled=n_y_c_labeled,
-            n_c_unlabeled=n_c_unlabeled,
-            y_labeled=datasets.valid_y,
-            prob_c_labeled=labeled_probabilities,
-            prob_c_unlabeled=unlabeled_probabilities,
-            labeled_prevalence=prevalence_labeled,
-        )
-
-        ax.vlines(
-            estimate[0],
-            ymin=0,
-            ymax=ymax,
-            label=alg.value,
-            colors=[f"C{i+2}"],
-            linestyles=[linestyles[i]],
-        )
-
-        print(estimate)
-
-    fig.legend()
-    fig.tight_layout()
-    fig.savefig("plot_cancer.pdf")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/experiment_gaussian.py b/scripts/experiment_gaussian.py
deleted file mode 100644
index 285980d..0000000
--- a/scripts/experiment_gaussian.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""This experiment plots the posterior in the Gaussian mixture model as well
-as a discretized version of that.
-"""
-import string
-from typing import List
-
-import arviz as az
-import matplotlib.pyplot as plt
-import numpy as np
-import pymc as pm
-import seaborn as sns
-
-import labelshift.partition as part
-import labelshift.summary_statistic as summ
-import labelshift.algorithms.bayesian_discrete as discrete
-
-
-plt.rcParams.update({"font.size": 22})
-
-
-def plot_distributions(
-    ax: plt.Axes,
-    X: np.ndarray,
-    X1: np.ndarray,
-    breakpoints: np.ndarray,
-    height: float = 1.0,
-) -> None:
-    """
-
-    Args:
-        ax: axes where to draw the plot
-        X: points from the labeled distribution, shape (n_labeled,)
-        X1: points from the unlabeled distribution, shape (n_unlabeled,)
-        breakpoints: breakpoints to be plotted, shape (n_breakpoints,)
-    """
-    sns.kdeplot(data=np.hstack(X), ax=ax)
-    sns.kdeplot(data=np.hstack(X1), ax=ax)
-
-    for bp in breakpoints:
-        ax.axvline(bp, ymax=height, linestyle="--", c="k", alpha=0.5)
-
-
-def gaussian_model(
-    labeled_data: List[np.ndarray], unlabeled_data: np.ndarray
-) -> pm.Model:
-    """
-    Args:
-        labeled_data: list of samples attributed to each Y:
-            [
-              [a1, ..., a_n0],
-              [b1, ..., b_n1]
-            ]
-        unlabeled_data: array of shape (n_unlabeled,)
-    """
-    with pm.Model() as model:
-        mu = pm.Normal("mu", mu=0, sigma=1, shape=2)
-        sigma = pm.HalfNormal("sigma", sigma=1, shape=2)
-
-        for i in range(2):
-            pm.Normal(
-                f"X_labeled{i}", mu=mu[i], sigma=sigma[i], observed=labeled_data[i]
-            )
-
-        weights = pm.Dirichlet("P_unlabeled(Y)", np.ones(2))
-
-        pm.NormalMixture(
-            "X_unlabeled", w=weights, mu=mu, sigma=sigma, observed=unlabeled_data
-        )
-
-    return model
-
-
-def main() -> None:
-    """The main method."""
-    mus = [0.0, 1.0]
-    sigmas = [0.3, 0.4]
-    ns = [500, 500]
-    ns_ = [200, 800]
-    K = 7
-    L = 2
-
-    partition = part.RealLinePartition(np.linspace(-0.5, 1.5, K - 1))
-    print(partition.breakpoints)
-
-    assert len(partition) == K
-
-    rng = np.random.default_rng(42)
-
-    X_stratified = [
-        rng.normal(loc=mu, scale=sigma, size=n) for mu, sigma, n in zip(mus, sigmas, ns)
-    ]
-    X = np.hstack(X_stratified)
-    Y = np.hstack([[i] * n for i, n in enumerate(ns)])
-
-    C = partition.predict(X)
-
-    X1_stratified = [
-        rng.normal(loc=mu, scale=sigma, size=n_)
-        for mu, sigma, n_ in zip(mus, sigmas, ns_)
-    ]
-    X1 = np.hstack(X1_stratified)
-    C1 = partition.predict(X1)
-
-    n_c_unlabeled = summ.count_values(K, C1)
-    n_y_c_labeled = summ.count_values_joint(L, K, Y, C)
-
-    print(n_c_unlabeled)
-    print(n_y_c_labeled)
-
-    fig, axs = plt.subplots(3, figsize=(6, 9))
-    plot_distributions(ax=axs[0], X=X, X1=X1, breakpoints=partition.breakpoints)
-
-    with gaussian_model(labeled_data=X_stratified, unlabeled_data=X1):
-        gaussian_data = pm.sample()
-
-    _, ax_trash = plt.subplots()
-
-    az.plot_posterior(gaussian_data, ax=[axs[1], ax_trash], var_names="P_unlabeled(Y)")
-    axs[1].set_title(r"$\pi'_1$ (Gaussian)")
-
-    with discrete.build_model(
-        n_y_and_c_labeled=n_y_c_labeled, n_c_unlabeled=n_c_unlabeled
-    ):
-        discrete_data = pm.sample()
-
-    az.plot_posterior(discrete_data, ax=[axs[2], ax_trash], var_names=discrete.P_TEST_Y)
-    axs[2].set_title(r"$\pi'_1$ (Discrete)")
-
-    for n, ax in enumerate(axs):
-        ax.text(
-            -0.1,
-            1.1,
-            string.ascii_uppercase[n],
-            transform=ax.transAxes,
-            size=20,
-            weight="bold",
-        )
-
-    fig.tight_layout()
-    fig.savefig("plot.pdf")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/run_categorical.py b/scripts/run_categorical.py
deleted file mode 100644
index 496f58c..0000000
--- a/scripts/run_categorical.py
+++ /dev/null
@@ -1,256 +0,0 @@
-"""Sample data directly from P(C|Y) distribution and run specified quantification estimator."""
-import argparse
-import enum
-from pathlib import Path
-from typing import List
-
-import pydantic
-
-import labelshift.interfaces.point_estimators as pe
-import labelshift.datasets.discrete_categorical as dc
-import labelshift.algorithms.api as algo
-import labelshift.experiments.api as exp
-
-
-class Algorithm(enum.Enum):
-    CLASSIFY_AND_COUNT = "CC"
-    RATIO_ESTIMATOR = "IR"
-    BBSE = "BBSE"
-    BAYESIAN = "MAP"
-
-
-def create_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--n-labeled", type=int, default=1_000, help="Number of labeled examples."
-    )
-    parser.add_argument(
-        "--n-unlabeled", type=int, default=1_000, help="Number of unlabeled examples."
-    )
-    parser.add_argument(
-        "--quality",
-        type=float,
-        default=0.85,
-        help="Quality of the classifier on the labeled data.",
-    )
-    parser.add_argument(
-        "--quality-unlabeled",
-        type=float,
-        default=None,
-        help="Quality of the classifier on the unlabeled data."
-        "Can be used to assess model misspecification. "
-        "If None, the quality will be the same for both labeled"
-        "and unlabeled data set (no misspecification).",
-    )
-    parser.add_argument("--L", type=int, default=2, help="Number of classes L.")
-    parser.add_argument(
-        "--K",
-        type=int,
-        default=None,
-        help="Number of available predictions. Default: the same as L.",
-    )
-    parser.add_argument(
-        "--prevalence-labeled",
-        type=float,
-        default=None,
-        help="Prevalence of the first class in the labeled data set. Default: 1/L (uniform).",
-    )
-    parser.add_argument(
-        "--prevalence-unlabeled",
-        type=float,
-        default=None,
-        help="Prevalence of the first class in the unlabeled data set. Default: 1/L (uniform).",
-    )
-    parser.add_argument(
-        "--seed", type=int, default=1, help="Random seed to sample the data."
-    )
-    parser.add_argument("--algorithm", type=Algorithm, default=Algorithm.BAYESIAN)
-    parser.add_argument(
-        "--output", type=Path, default=Path(f"{exp.generate_name()}.json")
-    )
-    parser.add_argument("--output-dir", type=Path, default=None)
-
-    parser.add_argument(
-        "--bayesian-alpha",
-        type=float,
-        default=1.0,
-        help="Dirichlet prior specification for the Bayesian quantification.",
-    )
-    parser.add_argument(
-        "--restricted",
-        type=bool,
-        default=True,
-        help="Whether to use restricted invariant ratio estimator.",
-    )
-
-    parser.add_argument(
-        "--tag", type=str, default="", help="Can be used to tag the run."
-    )
-
-    parser.add_argument("--dry-run", action="store_true")
-
-    return parser
-
-
-class EstimatorArguments(pydantic.BaseModel):
-    bayesian_alpha: float
-    restricted: bool
-
-
-class Arguments(pydantic.BaseModel):
-    p_y_labeled: pydantic.confloat(gt=0, lt=1)
-    p_y_unlabeled: pydantic.confloat(gt=0, lt=1)
-
-    quality_labeled: pydantic.confloat(ge=0, le=1)
-    quality_unlabeled: pydantic.confloat(ge=0, le=1)
-
-    n_y: pydantic.PositiveInt = pydantic.Field(description="Number of labels, L.")
-    n_c: pydantic.PositiveInt = pydantic.Field(description="Number of predictions, K.")
-
-    n_labeled: pydantic.PositiveInt
-    n_unlabeled: pydantic.PositiveInt
-
-    seed: int
-
-    algorithm: Algorithm
-    tag: str
-    estimator_arguments: EstimatorArguments
-
-
-def parse_args(args) -> Arguments:
-    n_y = args.L
-    n_c = exp.calculate_value(overwrite=args.K, default=n_y)
-
-    quality_unlabeled = exp.calculate_value(
-        overwrite=args.quality_unlabeled, default=args.quality
-    )
-
-    p_y_labeled = exp.calculate_value(
-        overwrite=args.prevalence_labeled, default=1 / n_y
-    )
-    p_y_unlabeled = exp.calculate_value(
-        overwrite=args.prevalence_unlabeled, default=1 / n_y
-    )
-
-    return Arguments(
-        p_y_labeled=p_y_labeled,
-        p_y_unlabeled=p_y_unlabeled,
-        quality_labeled=args.quality,
-        quality_unlabeled=quality_unlabeled,
-        n_y=n_y,
-        n_c=n_c,
-        seed=args.seed,
-        n_labeled=args.n_labeled,
-        n_unlabeled=args.n_unlabeled,
-        algorithm=args.algorithm,
-        tag=args.tag,
-        estimator_arguments=EstimatorArguments(
-            bayesian_alpha=args.bayesian_alpha,
-            restricted=args.restricted,
-        ),
-    )
-
-
-def create_sampler(args: Arguments) -> dc.DiscreteSampler:
-    L = args.n_y
-    p_y_labeled = dc.almost_eye(L, L, diagonal=args.p_y_labeled)[0, :]
-    p_y_unlabeled = dc.almost_eye(L, L, diagonal=args.p_y_unlabeled)[0, :]
-
-    p_c_cond_y_labeled = dc.almost_eye(
-        y=L,
-        c=args.n_c,
-        diagonal=args.quality_labeled,
-    )
-    p_c_cond_y_unlabeled = dc.almost_eye(
-        y=L,
-        c=args.n_c,
-        diagonal=args.quality_unlabeled,
-    )
-
-    return dc.discrete_sampler_factory(
-        p_y_labeled=p_y_labeled,
-        p_y_unlabeled=p_y_unlabeled,
-        p_c_cond_y_labeled=p_c_cond_y_labeled,
-        p_c_cond_y_unlabeled=p_c_cond_y_unlabeled,
-    )
-
-
-def get_estimator(args: Arguments) -> pe.SummaryStatisticPrevalenceEstimator:
-    if args.algorithm == Algorithm.CLASSIFY_AND_COUNT:
-        if args.n_c != args.n_y:
-            raise ValueError("For classify and count you need K = L.")
-        return algo.ClassifyAndCount()
-    elif args.algorithm == Algorithm.RATIO_ESTIMATOR:
-        return algo.InvariantRatioEstimator(
-            restricted=args.estimator_arguments.restricted, enforce_square=False
-        )
-    elif args.algorithm == Algorithm.BBSE:
-        return algo.BlackBoxShiftEstimator(enforce_square=False)
-    elif args.algorithm == Algorithm.BAYESIAN:
-        return algo.DiscreteCategoricalMAPEstimator(
-            alpha_unlabeled=args.estimator_arguments.bayesian_alpha
-        )
-    else:
-        raise ValueError(f"Algorithm {args.algorithm} not recognized.")
-
-
-class Result(pydantic.BaseModel):
-    p_y_unlabeled_true: List[float]
-    p_y_unlabeled_estimate: List[float]
-    time: float
-    algorithm: Algorithm
-
-    input_arguments: Arguments
-
-
-def dry_run(args: Arguments) -> None:
-    print("-- Dry run --\nUsed settings:")
-    print(args)
-    print("Exiting...")
-
-
-def main() -> None:
-    """The main function of the experiment."""
-    raw_args = create_parser().parse_args()
-    args: Arguments = parse_args(raw_args)
-
-    if raw_args.dry_run:
-        dry_run(args)
-        return
-
-    sampler = create_sampler(args)
-
-    summary_statistic = sampler.sample_summary_statistic(
-        n_labeled=args.n_labeled,
-        n_unlabeled=args.n_unlabeled,
-        seed=args.seed,
-    )
-
-    estimator = get_estimator(args)
-    timer = exp.Timer()
-    estimate = estimator.estimate_from_summary_statistic(summary_statistic)
-    elapsed_time = timer.check()
-
-    result = Result(
-        algorithm=args.algorithm,
-        time=elapsed_time,
-        p_y_unlabeled_true=sampler.unlabeled.p_y.tolist(),
-        p_y_unlabeled_estimate=estimate.tolist(),
-        input_arguments=args,
-    )
-
-    if raw_args.output_dir is not None:
-        raw_args.output_dir.mkdir(exist_ok=True, parents=True)
-        output_path = raw_args.output_dir / raw_args.output
-    else:
-        output_path = raw_args.output
-
-    with open(output_path, "w") as f:
-        f.write(result.json())
-
-    print(result)
-    print("Finished.")
-
-
-if __name__ == "__main__":
-    main()