Skip to content

Commit

Permalink
Merge pull request #2 from markteffect/dev-branch
Browse files Browse the repository at this point in the history
Dev branch
  • Loading branch information
ME-researchgroup committed May 6, 2024
2 parents 7f0a15a + 1418971 commit 32e9376
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
weightfactors-env/

# C extensions
*.so
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
![Continuous Integration](https://github.com/markteffect/weightfactors/actions/workflows/ci.yml/badge.svg)
![Python](https://img.shields.io/badge/Python-3.9%20|%203.10-blue)
![Python](https://img.shields.io/badge/Python-3.9+-blue)
# **Weight Factors**
Calculate weight factors for survey data to approximate a representative sample

Expand Down
7 changes: 7 additions & 0 deletions tests/test_generalized_raking.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ def test_invalid_input():
data = pd.DataFrame({"Gender": ["Male", "Male", "Female"]})
raker = GeneralizedRaker({"Gender": {"Male": 0.51, "Female": 0.5}})
raker.rake(data)
with pytest.raises(
KeyError,
match="There are observations for a value in 'Gender' that has not been mapped to a population target",
):
data = pd.DataFrame({"Gender": ["Male", "Male", "Female", "Other"]})
raker = GeneralizedRaker({"Gender": {"Male": 0.51, "Female": 0.49}})
raker.rake(data)


def test_generalized_raking_no_convergence():
Expand Down
17 changes: 12 additions & 5 deletions weightfactors/raking/generalized_raker.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class GeneralizedRaker:
Whether to raise an error when the weight factors are extreme
according to `cutoffs`, else we raise a warning. Default is False.
cutoffs: Dict[str, float], optional
What we consider extreme weight factors. 'lo' is the lower bound (defaults to 0.25)
When weights are considered to be extreme. 'lo' is the lower bound (defaults to 0.25)
and 'hi' is the upper bound (defaults to 4). If `raise_on_extreme` we raise an
error if any weight exceeds the cutoffs, otherwise we clip the extremes to the cutoffs
exclusion_column: str, optional
Expand Down Expand Up @@ -118,10 +118,17 @@ def validate_input(self, data: pd.DataFrame) -> None:
# Make sure all keys are present in the dataset
if key not in data.columns:
raise KeyError(f"There is no column {key} in the provided dataset")
# Make sure there are no missing values in the questions used for calculating weights
# Make sure there are no missing values in the columns used for calculating weights
if data[key].isna().any(axis=None):
raise ValueError(f"Column {key} contains missing values")
# Make sure all unique values in the target columns have been mapped
# It is impossible to set values with observations to a weight of 0
if len(data[key].unique()) != len(value):
raise KeyError(
f"There are observations for a value in '{key}' that has not been mapped to a population target"
)
# Make sure we have at least 1 observation for each category
# It is impossible to set values without observations to a weight larger than 1
for k, _ in value.items():
if k not in data[key].unique():
raise KeyError(f"There are no observations for {k} in column {key}")
Expand Down Expand Up @@ -193,11 +200,11 @@ def rake(
data: pd.DataFrame
The survey dataset
max_steps: int
Maximum number of iterations
The maximum number of iterations to try and reach convergence
tolerance: float
Maximum tolerance for loss, we claim success if the loss is lower than this
Maximum tolerance for loss, convergence is reached if the loss is smaller than this value
early_stopping: int
Maximum number of iterations without improvement in loss before we call quits
Maximum number of iterations without improvement in loss
Raises:
WeightsConvergenceError if the algorithm did not converge before `max_steps`
Expand Down

0 comments on commit 32e9376

Please sign in to comment.