From 2f528dc758cbdfd807ddc51b58da2c5d89c25e18 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 5 May 2024 12:23:06 +0100
Subject: [PATCH 1/4] test: refactor into separate HelpMessages class

---
 pysr/test/test.py | 195 ++++++++++++++++++++++++----------------------
 1 file changed, 100 insertions(+), 95 deletions(-)

diff --git a/pysr/test/test.py b/pysr/test/test.py
index 404d5a139..d2a169e16 100644
--- a/pysr/test/test.py
+++ b/pysr/test/test.py
@@ -563,6 +563,105 @@ def test_csv_to_pkl_conversion(self):
         test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
         self.assertEqual(test_pkl_file, str(expected_pkl_file))
 
+    def test_pickle_with_temp_equation_file(self):
+        """If we have a temporary equation file, unpickle the estimator."""
+        model = PySRRegressor(
+            populations=int(1 + DEFAULT_POPULATIONS / 5),
+            temp_equation_file=True,
+            procs=0,
+            multithreading=False,
+        )
+        nout = 3
+        X = np.random.randn(100, 2)
+        y = np.random.randn(100, nout)
+        model.fit(X, y)
+        contents = model.equation_file_contents_.copy()
+
+        y_predictions = model.predict(X)
+
+        equation_file_base = model.equation_file_
+        for i in range(1, nout + 1):
+            assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
+
+        with tempfile.NamedTemporaryFile() as pickle_file:
+            pkl.dump(model, pickle_file)
+            pickle_file.seek(0)
+            model2 = pkl.load(pickle_file)
+
+        contents2 = model2.equation_file_contents_
+        cols_to_check = ["equation", "loss", "complexity"]
+        for frame1, frame2 in zip(contents, contents2):
+            pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
+
+        y_predictions2 = model2.predict(X)
+        np.testing.assert_array_equal(y_predictions, y_predictions2)
+
+    def test_scikit_learn_compatibility(self):
+        """Test PySRRegressor compatibility with scikit-learn."""
+        model = PySRRegressor(
+            niterations=int(1 + DEFAULT_NITERATIONS / 10),
+            populations=int(1 + DEFAULT_POPULATIONS / 3),
+            ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
+            verbosity=0,
+            progress=False,
+            random_state=0,
+            deterministic=True,  # Deterministic as tests require this.
+            procs=0,
+            multithreading=False,
+            warm_start=False,
+            temp_equation_file=True,
+        )  # Return early.
+
+        check_generator = check_estimator(model, generate_only=True)
+        exception_messages = []
+        for _, check in check_generator:
+            if check.func.__name__ == "check_complex_data":
+                # We can use complex data, so avoid this check.
+                continue
+            try:
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    check(model)
+                print("Passed", check.func.__name__)
+            except Exception:
+                error_message = str(traceback.format_exc())
+                exception_messages.append(
+                    f"{check.func.__name__}:\n" + error_message + "\n"
+                )
+                print("Failed", check.func.__name__, "with:")
+                # Add a leading tab to error message, which
+                # might be multi-line:
+                print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
+        # If any checks failed don't let the test pass.
+        self.assertEqual(len(exception_messages), 0)
+
+    def test_param_groupings(self):
+        """Test that param_groupings are complete"""
+        param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
+        if not param_groupings_file.exists():
+            return
+
+        # Read the file, discarding lines ending in ":",
+        # and removing leading "\s*-\s*":
+        params = []
+        with open(param_groupings_file, "r") as f:
+            for line in f.readlines():
+                if line.strip().endswith(":"):
+                    continue
+                if line.strip().startswith("-"):
+                    params.append(line.strip()[1:].strip())
+
+        regressor_params = [
+            p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
+        ]
+
+        # Check the sets are equal:
+        self.assertSetEqual(set(params), set(regressor_params))
+
+
+class TestHelpMessages(unittest.TestCase):
+    """Test user help messages."""
+
     def test_deprecation(self):
         """Ensure that deprecation works as expected.
 
@@ -705,101 +804,6 @@ def test_bad_kwargs(self):
                 model.get_best()
                 print("Failed", opt["kwargs"])
 
-    def test_pickle_with_temp_equation_file(self):
-        """If we have a temporary equation file, unpickle the estimator."""
-        model = PySRRegressor(
-            populations=int(1 + DEFAULT_POPULATIONS / 5),
-            temp_equation_file=True,
-            procs=0,
-            multithreading=False,
-        )
-        nout = 3
-        X = np.random.randn(100, 2)
-        y = np.random.randn(100, nout)
-        model.fit(X, y)
-        contents = model.equation_file_contents_.copy()
-
-        y_predictions = model.predict(X)
-
-        equation_file_base = model.equation_file_
-        for i in range(1, nout + 1):
-            assert not os.path.exists(str(equation_file_base) + f".out{i}.bkup")
-
-        with tempfile.NamedTemporaryFile() as pickle_file:
-            pkl.dump(model, pickle_file)
-            pickle_file.seek(0)
-            model2 = pkl.load(pickle_file)
-
-        contents2 = model2.equation_file_contents_
-        cols_to_check = ["equation", "loss", "complexity"]
-        for frame1, frame2 in zip(contents, contents2):
-            pd.testing.assert_frame_equal(frame1[cols_to_check], frame2[cols_to_check])
-
-        y_predictions2 = model2.predict(X)
-        np.testing.assert_array_equal(y_predictions, y_predictions2)
-
-    def test_scikit_learn_compatibility(self):
-        """Test PySRRegressor compatibility with scikit-learn."""
-        model = PySRRegressor(
-            niterations=int(1 + DEFAULT_NITERATIONS / 10),
-            populations=int(1 + DEFAULT_POPULATIONS / 3),
-            ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10),
-            verbosity=0,
-            progress=False,
-            random_state=0,
-            deterministic=True,  # Deterministic as tests require this.
-            procs=0,
-            multithreading=False,
-            warm_start=False,
-            temp_equation_file=True,
-        )  # Return early.
-
-        check_generator = check_estimator(model, generate_only=True)
-        exception_messages = []
-        for _, check in check_generator:
-            if check.func.__name__ == "check_complex_data":
-                # We can use complex data, so avoid this check.
-                continue
-            try:
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore")
-                    check(model)
-                print("Passed", check.func.__name__)
-            except Exception:
-                error_message = str(traceback.format_exc())
-                exception_messages.append(
-                    f"{check.func.__name__}:\n" + error_message + "\n"
-                )
-                print("Failed", check.func.__name__, "with:")
-                # Add a leading tab to error message, which
-                # might be multi-line:
-                print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
-        # If any checks failed don't let the test pass.
-        self.assertEqual(len(exception_messages), 0)
-
-    def test_param_groupings(self):
-        """Test that param_groupings are complete"""
-        param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml"
-        if not param_groupings_file.exists():
-            return
-
-        # Read the file, discarding lines ending in ":",
-        # and removing leading "\s*-\s*":
-        params = []
-        with open(param_groupings_file, "r") as f:
-            for line in f.readlines():
-                if line.strip().endswith(":"):
-                    continue
-                if line.strip().startswith("-"):
-                    params.append(line.strip()[1:].strip())
-
-        regressor_params = [
-            p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
-        ]
-
-        # Check the sets are equal:
-        self.assertSetEqual(set(params), set(regressor_params))
-
 
 TRUE_PREAMBLE = "\n".join(
     [
@@ -1148,6 +1152,7 @@ def runtests(just_tests=False):
         TestBest,
         TestFeatureSelection,
         TestMiscellaneous,
+        TestHelpMessages,
         TestLaTeXTable,
         TestDimensionalConstraints,
     ]

From ea7ced402bd5db4e675175e4527cca804bb9f170 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 5 May 2024 12:23:42 +0100
Subject: [PATCH 2/4] feat: automatically suggest related keywords

---
 pysr/sr.py        | 19 ++++++++++++++++---
 pysr/test/test.py | 20 ++++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 4b8d5b30e..307098806 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1,6 +1,8 @@
 """Define the PySRRegressor scikit-learn interface."""
 
 import copy
+import difflib
+import inspect
 import os
 import pickle as pkl
 import re
@@ -907,9 +909,11 @@ def __init__(
                         FutureWarning,
                     )
                 else:
-                    raise TypeError(
-                        f"{k} is not a valid keyword argument for PySRRegressor."
-                    )
+                    suggested_keywords = self._suggest_keywords(k)
+                    err_msg = f"{k} is not a valid keyword argument for PySRRegressor."
+                    if len(suggested_keywords) > 0:
+                        err_msg += f" Did you mean {' or '.join(suggested_keywords)}?"
+                    raise TypeError(err_msg)
 
     @classmethod
     def from_file(
@@ -1991,6 +1995,15 @@ def fit(
 
         return self
 
+    def _suggest_keywords(self, k: str) -> List[str]:
+        valid_keywords = [
+            param
+            for param in inspect.signature(self.__init__).parameters
+            if param not in ["self", "kwargs"]
+        ]
+        suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
+        return suggestions
+
     def refresh(self, checkpoint_file=None) -> None:
         """
         Update self.equations_ with any new options passed.
diff --git a/pysr/test/test.py b/pysr/test/test.py
index d2a169e16..2d27f78b6 100644
--- a/pysr/test/test.py
+++ b/pysr/test/test.py
@@ -804,6 +804,26 @@ def test_bad_kwargs(self):
                 model.get_best()
                 print("Failed", opt["kwargs"])
 
+    def test_suggest_keywords(self):
+        model = PySRRegressor()
+        # Easy
+        self.assertEqual(model._suggest_keywords("loss_function"), ["loss_function"])
+
+        # More complex, and with error
+        with self.assertRaises(TypeError) as cm:
+            model = PySRRegressor(ncyclesperiterationn=5)
+
+        self.assertIn("ncyclesperiterationn is not a valid keyword", str(cm.exception))
+        self.assertIn("Did you mean", str(cm.exception))
+        self.assertIn("ncycles_per_iteration or", str(cm.exception))
+        self.assertIn("niteration", str(cm.exception))
+
+        # Farther matches (this might need to be changed)
+        with self.assertRaises(TypeError) as cm:
+            model = PySRRegressor(operators=["+", "-"])
+
+        self.assertIn("unary_operators or binary_operators", str(cm.exception))
+
 
 TRUE_PREAMBLE = "\n".join(
     [

From 62481837578a2b118c6dd8db9031d6c36eb521bd Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 5 May 2024 12:29:50 +0100
Subject: [PATCH 3/4] refactor: clean up use of `__init__` in keyword
 suggestion

---
 pysr/sr.py        | 21 +++++++++++----------
 pysr/test/test.py | 12 +++++++++---
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 307098806..0cfce42b5 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -909,7 +909,7 @@ def __init__(
                         FutureWarning,
                     )
                 else:
-                    suggested_keywords = self._suggest_keywords(k)
+                    suggested_keywords = _suggest_keywords(PySRRegressor, k)
                     err_msg = f"{k} is not a valid keyword argument for PySRRegressor."
                     if len(suggested_keywords) > 0:
                         err_msg += f" Did you mean {' or '.join(suggested_keywords)}?"
@@ -1995,15 +1995,6 @@ def fit(
 
         return self
 
-    def _suggest_keywords(self, k: str) -> List[str]:
-        valid_keywords = [
-            param
-            for param in inspect.signature(self.__init__).parameters
-            if param not in ["self", "kwargs"]
-        ]
-        suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
-        return suggestions
-
     def refresh(self, checkpoint_file=None) -> None:
         """
         Update self.equations_ with any new options passed.
@@ -2455,6 +2446,16 @@ def latex_table(
         return with_preamble(table_string)
 
 
+def _suggest_keywords(cls, k: str) -> List[str]:
+    valid_keywords = [
+        param
+        for param in inspect.signature(cls.__init__).parameters
+        if param not in ["self", "kwargs"]
+    ]
+    suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
+    return suggestions
+
+
 def idx_model_selection(equations: pd.DataFrame, model_selection: str):
     """Select an expression and return its index."""
     if model_selection == "accuracy":
diff --git a/pysr/test/test.py b/pysr/test/test.py
index 2d27f78b6..4a5c23a61 100644
--- a/pysr/test/test.py
+++ b/pysr/test/test.py
@@ -15,7 +15,12 @@
 from ..export_latex import sympy2latex
 from ..feature_selection import _handle_feature_selection, run_feature_selection
 from ..julia_helpers import init_julia
-from ..sr import _check_assertions, _process_constraints, idx_model_selection
+from ..sr import (
+    _check_assertions,
+    _process_constraints,
+    _suggest_keywords,
+    idx_model_selection,
+)
 from ..utils import _csv_filename_to_pkl_filename
 from .params import (
     DEFAULT_NCYCLES,
@@ -805,9 +810,10 @@ def test_bad_kwargs(self):
                 print("Failed", opt["kwargs"])
 
     def test_suggest_keywords(self):
-        model = PySRRegressor()
         # Easy
-        self.assertEqual(model._suggest_keywords("loss_function"), ["loss_function"])
+        self.assertEqual(
+            _suggest_keywords(PySRRegressor, "loss_function"), ["loss_function"]
+        )
 
         # More complex, and with error
         with self.assertRaises(TypeError) as cm:

From 6088859cdd32666862f95bdde2091d5ddf749691 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 5 May 2024 12:38:57 +0100
Subject: [PATCH 4/4] docs: better formatting for errors

---
 pysr/sr.py        | 12 +++++++-----
 pysr/test/test.py | 10 ++++++----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 0cfce42b5..36c33cf38 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -884,15 +884,15 @@ def __init__(
                     updated_kwarg_name = DEPRECATED_KWARGS[k]
                     setattr(self, updated_kwarg_name, v)
                     warnings.warn(
-                        f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. "
+                        f"`{k}` has been renamed to `{updated_kwarg_name}` in PySRRegressor. "
                         "Please use that instead.",
                         FutureWarning,
                     )
                 # Handle kwargs that have been moved to the fit method
                 elif k in ["weights", "variable_names", "Xresampled"]:
                     warnings.warn(
-                        f"{k} is a data dependant parameter so should be passed when fit is called. "
-                        f"Ignoring parameter; please pass {k} during the call to fit instead.",
+                        f"`{k}` is a data-dependent parameter and should be passed when fit is called. "
+                        f"Ignoring parameter; please pass `{k}` during the call to fit instead.",
                         FutureWarning,
                     )
                 elif k == "julia_project":
@@ -910,9 +910,11 @@ def __init__(
                     )
                 else:
                     suggested_keywords = _suggest_keywords(PySRRegressor, k)
-                    err_msg = f"{k} is not a valid keyword argument for PySRRegressor."
+                    err_msg = (
+                        f"`{k}` is not a valid keyword argument for PySRRegressor."
+                    )
                     if len(suggested_keywords) > 0:
-                        err_msg += f" Did you mean {' or '.join(suggested_keywords)}?"
+                        err_msg += f" Did you mean {', '.join(map(lambda s: f'`{s}`', suggested_keywords))}?"
                     raise TypeError(err_msg)
 
     @classmethod
diff --git a/pysr/test/test.py b/pysr/test/test.py
index 4a5c23a61..14393fce0 100644
--- a/pysr/test/test.py
+++ b/pysr/test/test.py
@@ -819,16 +819,18 @@ def test_suggest_keywords(self):
         with self.assertRaises(TypeError) as cm:
             model = PySRRegressor(ncyclesperiterationn=5)
 
-        self.assertIn("ncyclesperiterationn is not a valid keyword", str(cm.exception))
+        self.assertIn(
+            "`ncyclesperiterationn` is not a valid keyword", str(cm.exception)
+        )
         self.assertIn("Did you mean", str(cm.exception))
-        self.assertIn("ncycles_per_iteration or", str(cm.exception))
-        self.assertIn("niteration", str(cm.exception))
+        self.assertIn("`ncycles_per_iteration`, ", str(cm.exception))
+        self.assertIn("`niterations`", str(cm.exception))
 
         # Farther matches (this might need to be changed)
         with self.assertRaises(TypeError) as cm:
             model = PySRRegressor(operators=["+", "-"])
 
-        self.assertIn("unary_operators or binary_operators", str(cm.exception))
+        self.assertIn("`unary_operators`, `binary_operators`", str(cm.exception))
 
 
 TRUE_PREAMBLE = "\n".join(