diff --git a/.github/workflows/publish-python-package.yml b/.github/workflows/publish-python-package.yml index 75b9a41e..4ed9e1bf 100644 --- a/.github/workflows/publish-python-package.yml +++ b/.github/workflows/publish-python-package.yml @@ -20,7 +20,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/test-python-package.yml b/.github/workflows/test-python-package.yml index a4db1863..3c88e721 100644 --- a/.github/workflows/test-python-package.yml +++ b/.github/workflows/test-python-package.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] steps: - uses: actions/checkout@v4 diff --git a/dataprofiler/tests/labelers/test_labeler_utils.py b/dataprofiler/tests/labelers/test_labeler_utils.py index f59a43e3..c14fca54 100644 --- a/dataprofiler/tests/labelers/test_labeler_utils.py +++ b/dataprofiler/tests/labelers/test_labeler_utils.py @@ -1,6 +1,6 @@ import logging +import tempfile import unittest -from unittest import mock import numpy as np import pandas as pd @@ -235,9 +235,7 @@ def test_verbose(self): self.assertIn("f1-score ", log_output) self.assertIn("F1 Score: ", log_output) - @mock.patch("dataprofiler.labelers.labeler_utils.classification_report") - @mock.patch("pandas.DataFrame") - def test_save_conf_mat(self, mock_dataframe, mock_report): + def test_save_conf_mat(self): # ideally mock out the actual contents written to file, but # would be difficult to get this completely worked out. @@ -248,28 +246,25 @@ def test_save_conf_mat(self, mock_dataframe, mock_report): [0, 1, 2], ] ) - expected_row_col_names = dict( - columns=["pred:PAD", "pred:UNKNOWN", "pred:OTHER"], - index=["true:PAD", "true:UNKNOWN", "true:OTHER"], - ) - mock_instance_df = mock.Mock(spec=pd.DataFrame)() - mock_dataframe.return_value = mock_instance_df - - # still omit bc confusion mat should include all despite omit - f1, f1_report = labeler_utils.evaluate_accuracy( - self.y_pred, - self.y_true, - self.num_labels, - self.reverse_label_mapping, - omitted_labels=["PAD"], - verbose=False, - confusion_matrix_file="test.csv", - ) + expected_columns = ["pred:PAD", "pred:UNKNOWN", "pred:OTHER"] + expected_index = ["true:PAD", "true:UNKNOWN", "true:OTHER"] - self.assertTrue((mock_dataframe.call_args[0][0] == expected_conf_mat).all()) - self.assertDictEqual(expected_row_col_names, mock_dataframe.call_args[1]) + with tempfile.NamedTemporaryFile() as tmpFile: + # still omit bc confusion mat should include all despite omit + f1, f1_report = labeler_utils.evaluate_accuracy( + self.y_pred, + self.y_true, + self.num_labels, + self.reverse_label_mapping, + omitted_labels=["PAD"], + verbose=False, + confusion_matrix_file=tmpFile.name, + ) - mock_instance_df.to_csv.assert_called() + df1 = pd.read_csv(tmpFile.name, index_col=0) + self.assertListEqual(list(df1.columns), expected_columns) + self.assertListEqual(list(df1.index), expected_index) + np.testing.assert_array_equal(df1.values, expected_conf_mat) class TestTFFunctions(unittest.TestCase): diff --git a/requirements-test.txt b/requirements-test.txt index 6c981cf9..725b2384 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,5 @@ coverage>=5.0.1 -dask>=2.29.0,<2024.2.0 +dask[dask-expr,dataframe]>=2024.4.1 fsspec>=0.3.3 pytest>=6.0.1 pytest-cov>=2.8.1 diff --git a/setup.py b/setup.py index f8b5eaf8..eeca6629 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ setup( name="DataProfiler", version=__version__, - python_requires=">=3.8", + python_requires=">=3.9", description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", diff --git a/tox.ini b/tox.ini index 38159f5f..219b2783 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py39, py310, pypi-description, manifest, precom +envlist = py39, py310, 311, pypi-description, manifest, precom [testenv]