improvements to documentation

interpretml · Nov 19, 2024 · 5fbea8c · 5fbea8c
1 parent 2975384
commit 5fbea8c
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 11 deletions.
diff --git a/docs/interpret/_toc.yml b/docs/interpret/_toc.yml
@@ -41,6 +41,7 @@ chapters:
     - file: python/api/link_func
     - file: python/api/inv_link
     - file: python/api/measure_interactions
+    - file: python/api/purify
   - file: python/api/interpret-develop
     sections:
     - file: python/api/debug_mode

diff --git a/docs/interpret/python/api/purify.ipynb b/docs/interpret/python/api/purify.ipynb
@@ -0,0 +1,23 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "api785f7",
+   "metadata": {},
+   "source": [
+    "# purify\n",
+    "\n",
+    "```{eval-rst}\n",
+    ".. autofunction:: interpret.utils.purify\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/python/interpret-core/interpret/glassbox/_ebm/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm/_ebm.py
@@ -979,9 +979,8 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
         results = provider.parallel(boost, parallel_args)
 
         # let python reclaim the dataset memory via reference counting
-        del (
-            parallel_args
-        )  # parallel_args holds references to dataset, so must be deleted
+        # parallel_args holds references to dataset, so must be deleted
+        del parallel_args
         del dataset
 
         best_iteration = [[]]
@@ -1063,9 +1062,8 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                 feature_names_in,
                 feature_types_in,
             )
-            del (
-                y
-            )  # we no longer need this, so allow the garbage collector to reclaim it
+            # we no longer need this, so allow the garbage collector to reclaim it
+            del y
 
             if isinstance(interactions, int):
                 _log.info("Estimating with FAST")
@@ -2477,9 +2475,10 @@ class ExplainableBoostingClassifier(EBMModel, ClassifierMixin, ExplainerMixin):
             - Integer (1 <= interactions): Count of interactions to be automatically selected
             - Percentage (interactions < 1.0): Determine the integer count of interactions by multiplying the number of features by this percentage
             - List of tuples: The tuples contain the indices of the features within each additive term. In addition to pairs,
-              the interactions parameter accepts higher order interactions. It also accepts single feature terms which will cause
+              the interactions parameter accepts higher order interactions. It also accepts univariate terms which will cause
               the algorithm to boost the main terms at the same time as the interactions. When boosting mains at the same time
-              as interactions, the exclude parameter should usually be set to 'mains'.
+              as interactions, the exclude parameter should be set to 'mains' and currently max_bins needs to be equal to
+              max_interaction_bins.
     exclude : 'mains' or list of tuples of feature indices|names, default=None
         Features or terms to be excluded.
     validation_size : int or float, default=0.15
@@ -2826,9 +2825,10 @@ class ExplainableBoostingRegressor(EBMModel, RegressorMixin, ExplainerMixin):
             - Integer (1 <= interactions): Count of interactions to be automatically selected
             - Percentage (interactions < 1.0): Determine the integer count of interactions by multiplying the number of features by this percentage
             - List of tuples: The tuples contain the indices of the features within each additive term. In addition to pairs,
-              the interactions parameter accepts higher order interactions. It also accepts single feature terms which will cause
+              the interactions parameter accepts higher order interactions. It also accepts univariate terms which will cause
               the algorithm to boost the main terms at the same time as the interactions. When boosting mains at the same time
-              as interactions, the exclude parameter should usually be set to 'mains'.
+              as interactions, the exclude parameter should be set to 'mains' and currently max_bins needs to be equal to
+              max_interaction_bins.
     exclude : 'mains' or list of tuples of feature indices|names, default=None
         Features or terms to be excluded.
     validation_size : int or float, default=0.15

diff --git a/python/interpret-core/interpret/utils/_purify.py b/python/interpret-core/interpret/utils/_purify.py
@@ -16,7 +16,7 @@ def _measure_impurity(scores, weights):
 
 
 def purify(scores, weights, tolerance=0.0, is_randomized=True):
-    """Purifies a score tensor into it's pure component and a series of impure components. For pairs, the
+    """Purifies a multi-dimensional tensor into it's pure component and a series of impure components. For pairs, the
         result will be a pair where the weighted sum along any row or column is zero, and the two main effects
         which are the impurities from the pair. The main effects will be further purified into zero-centered graphs
         and an intercept. This function also handles multiclass, which is detected when the scores tensor has one