From 2af117b9470f8615b910648327caa08d63f2549f Mon Sep 17 00:00:00 2001
From: DvGils <demian_vg@hotmail.nl>
Date: Mon, 6 May 2024 09:20:20 +0200
Subject: [PATCH 1/5] chore: improve docstrings

---
 weightfactors/raking/generalized_raker.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/weightfactors/raking/generalized_raker.py b/weightfactors/raking/generalized_raker.py
index cc0bf66..46f30af 100644
--- a/weightfactors/raking/generalized_raker.py
+++ b/weightfactors/raking/generalized_raker.py
@@ -22,7 +22,7 @@ class GeneralizedRaker:
             Whether to raise an error when the weight factors are extreme
                 according to `cutoffs`, else we raise a warning. Default is False.
         cutoffs: Dict[str, float], optional
-            What we consider extreme weight factors. 'lo' is the lower bound (defaults to 0.25)
+            When weights are considered to be extreme. 'lo' is the lower bound (defaults to 0.25)
                 and 'hi' is the upper bound (defaults to 4). If `raise_on_extreme` we raise an
                     error if any weight exceeds the cutoffs, otherwise we clip the extremes to the cutoffs
         exclusion_column: str, optional
@@ -193,11 +193,11 @@ def rake(
             data: pd.DataFrame
                 The survey dataset
             max_steps: int
-                Maximum number of iterations
+                The maximum number of iterations to try and reach convergence
             tolerance: float
-                Maximum tolerance for loss, we claim success if the loss is lower than this
+                Maximum tolerance for loss, convergence is reached if the loss is smaller than this value
             early_stopping: int
-                Maximum number of iterations without improvement in loss before we call quits
+                Maximum number of iterations without improvement in loss
 
         Raises:
             WeightsConvergenceError if the algorithm did not converge before `max_steps`

From 00bce214230737a37240aaefa283639c577c02a4 Mon Sep 17 00:00:00 2001
From: DvGils <demian_vg@hotmail.nl>
Date: Mon, 6 May 2024 09:31:03 +0200
Subject: [PATCH 2/5] chore: add validation input for observations that have
 not been mapped to a population target

---
 tests/test_generalized_raking.py          | 4 ++++
 weightfactors/raking/generalized_raker.py | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/test_generalized_raking.py b/tests/test_generalized_raking.py
index aae3389..ba3c5c9 100644
--- a/tests/test_generalized_raking.py
+++ b/tests/test_generalized_raking.py
@@ -38,6 +38,10 @@ def test_invalid_input():
         data = pd.DataFrame({"Gender": ["Male", "Male", "Female"]})
         raker = GeneralizedRaker({"Gender": {"Male": 0.51, "Female": 0.5}})
         raker.rake(data)
+    with pytest.raises(KeyError, match="There are observations for a value in 'Gender' that has not been mapped to a population target"):
+        data = pd.DataFrame({"Gender": ["Male", "Male", "Female", "Other"]})
+        raker = GeneralizedRaker({"Gender": {"Male": 0.51, "Female": 0.49}})
+        raker.rake(data)
 
 
 def test_generalized_raking_no_convergence():
diff --git a/weightfactors/raking/generalized_raker.py b/weightfactors/raking/generalized_raker.py
index 46f30af..267ed50 100644
--- a/weightfactors/raking/generalized_raker.py
+++ b/weightfactors/raking/generalized_raker.py
@@ -118,10 +118,15 @@ def validate_input(self, data: pd.DataFrame) -> None:
             # Make sure all keys are present in the dataset
             if key not in data.columns:
                 raise KeyError(f"There is no column {key} in the provided dataset")
-            # Make sure there are no missing values in the questions used for calculating weights
+            # Make sure there are no missing values in the columns used for calculating weights
             if data[key].isna().any(axis=None):
                 raise ValueError(f"Column {key} contains missing values")
+            # Make sure all unique values in the target columns have been mapped
+            # It is impossible to set values with observations to a weight of 0
+            if len(data[key].unique()) != len(value):
+                raise KeyError(f"There are observations for a value in '{key}' that has not been mapped to a population target")
             # Make sure we have at least 1 observation for each category
+            # It is impossible to set values without observations to a weight larger than 1
             for k, _ in value.items():
                 if k not in data[key].unique():
                     raise KeyError(f"There are no observations for {k} in column {key}")

From b7a06b02f3112d35e63cf26e254f5ab945cbae7f Mon Sep 17 00:00:00 2001
From: DvGils <demian_vg@hotmail.nl>
Date: Mon, 6 May 2024 09:31:13 +0200
Subject: [PATCH 3/5] chore: update `.gitignore`

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 6769e21..ef951ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
+weightfactors-env/
 
 # C extensions
 *.so

From 36fd49649fbdb4c82a0dec07bfe2cc2bb11767ca Mon Sep 17 00:00:00 2001
From: DvGils <demian_vg@hotmail.nl>
Date: Mon, 6 May 2024 09:36:26 +0200
Subject: [PATCH 4/5] chore: update `README`

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 03e443e..cd8fb86 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 ![Continuous Integration](https://github.com/markteffect/weightfactors/actions/workflows/ci.yml/badge.svg)
-![Python](https://img.shields.io/badge/Python-3.9%20|%203.10-blue)
+![Python](https://img.shields.io/badge/Python-3.9+-blue)
 # **Weight Factors**
 Calculate weight factors for survey data to approximate a representative sample
 

From 1418971d44a294602e39922a75e11e6501819ff2 Mon Sep 17 00:00:00 2001
From: DvGils <demian_vg@hotmail.nl>
Date: Mon, 6 May 2024 09:38:06 +0200
Subject: [PATCH 5/5] fix[black]: code formatting

---
 tests/test_generalized_raking.py          | 5 ++++-
 weightfactors/raking/generalized_raker.py | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/test_generalized_raking.py b/tests/test_generalized_raking.py
index ba3c5c9..ec93fc7 100644
--- a/tests/test_generalized_raking.py
+++ b/tests/test_generalized_raking.py
@@ -38,7 +38,10 @@ def test_invalid_input():
         data = pd.DataFrame({"Gender": ["Male", "Male", "Female"]})
         raker = GeneralizedRaker({"Gender": {"Male": 0.51, "Female": 0.5}})
         raker.rake(data)
-    with pytest.raises(KeyError, match="There are observations for a value in 'Gender' that has not been mapped to a population target"):
+    with pytest.raises(
+        KeyError,
+        match="There are observations for a value in 'Gender' that has not been mapped to a population target",
+    ):
         data = pd.DataFrame({"Gender": ["Male", "Male", "Female", "Other"]})
         raker = GeneralizedRaker({"Gender": {"Male": 0.51, "Female": 0.49}})
         raker.rake(data)
diff --git a/weightfactors/raking/generalized_raker.py b/weightfactors/raking/generalized_raker.py
index 267ed50..21f0736 100644
--- a/weightfactors/raking/generalized_raker.py
+++ b/weightfactors/raking/generalized_raker.py
@@ -124,7 +124,9 @@ def validate_input(self, data: pd.DataFrame) -> None:
             # Make sure all unique values in the target columns have been mapped
             # It is impossible to set values with observations to a weight of 0
             if len(data[key].unique()) != len(value):
-                raise KeyError(f"There are observations for a value in '{key}' that has not been mapped to a population target")
+                raise KeyError(
+                    f"There are observations for a value in '{key}' that has not been mapped to a population target"
+                )
             # Make sure we have at least 1 observation for each category
             # It is impossible to set values without observations to a weight larger than 1
             for k, _ in value.items():