From 35c3dc8a922593692d4209217b0ca49749907e3c Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Fri, 12 Jan 2024 09:58:49 +0100 Subject: [PATCH 1/3] Corpus - Remove attribute copying --- orangecontrib/text/corpus.py | 4 ---- orangecontrib/text/tests/test_corpus.py | 14 -------------- 2 files changed, 18 deletions(-) diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py index b27f096ab..2af7e5161 100644 --- a/orangecontrib/text/corpus.py +++ b/orangecontrib/text/corpus.py @@ -544,8 +544,6 @@ def from_table(cls, domain, source, row_indices=...): c = super().from_table(domain, source, row_indices) c._setup_corpus() Corpus.retain_preprocessing(source, c, row_indices) - # temp fix: remove when oldest Orange >= 3.34 - c.attributes = deepcopy(c.attributes) return c @classmethod @@ -587,8 +585,6 @@ def from_table_rows(cls, source, row_indices): if hasattr(source, "_titles"): # covering case when from_table_rows called by from_table c._titles = source._titles[row_indices] - # temp fix: remove when oldest Orange >= 3.34 - c.attributes = deepcopy(c.attributes) return c @classmethod diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py index ac9b6e7cf..42a82df92 100644 --- a/orangecontrib/text/tests/test_corpus.py +++ b/orangecontrib/text/tests/test_corpus.py @@ -671,20 +671,6 @@ def test_language_copied(self): self.assertEqual(new_corpus.language, "sl") self.assertEqual(corpus.language, "en") - def test_remove_attributes_copy(self): - """ - Happy new year! - - We added a deepcopy of attributes to from_table and from_table_rows - since Orange didn't copy attributes. It should be removed when oldest - supported Orange is 3.34 or higher. - When test starts to fail: - - remove it - - remove copying of attributes in from_table and from_table_rows - - update Orange version to >=3.34 if not done yet - """ - self.assertLess(datetime.today(), datetime(2024, 1, 1)) - def test_language_unpickle(self): path = os.path.dirname(__file__) file = os.path.abspath(os.path.join(path, "data", "book-excerpts.pkl")) From 9d78e0f0e8cf61f677fed274a6e1d7651b4db1d1 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Fri, 12 Jan 2024 10:03:50 +0100 Subject: [PATCH 2/3] Use enum2int from Orange --- orangecontrib/text/widgets/owkeywords.py | 2 +- .../text/widgets/owscoredocuments.py | 2 +- orangecontrib/text/widgets/utils/__init__.py | 21 ------------------- .../text/widgets/utils/tests/test_utils.py | 18 ---------------- requirements.txt | 2 +- tox.ini | 4 ++-- 6 files changed, 5 insertions(+), 44 deletions(-) delete mode 100644 orangecontrib/text/widgets/utils/tests/test_utils.py diff --git a/orangecontrib/text/widgets/owkeywords.py b/orangecontrib/text/widgets/owkeywords.py index c2f6b85e6..a5df964a8 100644 --- a/orangecontrib/text/widgets/owkeywords.py +++ b/orangecontrib/text/widgets/owkeywords.py @@ -15,6 +15,7 @@ from Orange.widgets import gui from Orange.widgets.settings import DomainContextHandler, ContextSetting, \ Setting +from Orange.widgets.utils import enum2int from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState from Orange.widgets.utils.itemmodels import PyTableModel, TableModel from Orange.widgets.widget import Input, Output, OWWidget, Msg @@ -23,7 +24,6 @@ from orangecontrib.text.keywords import ScoringMethods, AggregationMethods, \ YAKE_LANGUAGE_MAPPING, RAKE_LANGUAGES from orangecontrib.text.preprocess import BaseNormalizer -from orangecontrib.text.widgets.utils import enum2int from orangecontrib.text.widgets.utils.words import create_words_table, \ WORDS_COLUMN_NAME diff --git a/orangecontrib/text/widgets/owscoredocuments.py b/orangecontrib/text/widgets/owscoredocuments.py index 649a6d2bb..9637ba2ca 100644 --- a/orangecontrib/text/widgets/owscoredocuments.py +++ b/orangecontrib/text/widgets/owscoredocuments.py @@ -24,6 +24,7 @@ from Orange.data.util import get_unique_names from Orange.util import wrap_callback from Orange.widgets.settings import ContextSetting, PerfectDomainContextHandler, Setting +from Orange.widgets.utils import enum2int from Orange.widgets.utils.annotated_data import create_annotated_table, add_columns from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState from Orange.widgets.utils.itemmodels import PyTableModel, TableModel @@ -36,7 +37,6 @@ from orangecontrib.text import Corpus from orangecontrib.text.preprocess import BaseNormalizer, NGrams, BaseTokenFilter from orangecontrib.text.vectorization.sbert import SBERT -from orangecontrib.text.widgets.utils import enum2int from orangecontrib.text.widgets.utils.words import create_words_table diff --git a/orangecontrib/text/widgets/utils/__init__.py b/orangecontrib/text/widgets/utils/__init__.py index eba92aea3..c7b2a6b05 100644 --- a/orangecontrib/text/widgets/utils/__init__.py +++ b/orangecontrib/text/widgets/utils/__init__.py @@ -1,24 +1,3 @@ -from enum import IntEnum, Enum -from typing import Union - from .decorators import * from .widgets import * from .concurrent import asynchronous - - -def enum2int(enum: Union[Enum, IntEnum]) -> int: - """ - PyQt5 uses IntEnum like object for settings, for example SortOrder while - PyQt6 uses Enum. PyQt5's IntEnum also does not support value attribute. - This function transform both settings objects to int. - - Parameters - ---------- - enum - IntEnum like object or Enum object with Qt's settings - - Returns - ------- - Settings transformed to int - """ - return int(enum) if isinstance(enum, int) else enum.value diff --git a/orangecontrib/text/widgets/utils/tests/test_utils.py b/orangecontrib/text/widgets/utils/tests/test_utils.py deleted file mode 100644 index 90f100a5b..000000000 --- a/orangecontrib/text/widgets/utils/tests/test_utils.py +++ /dev/null @@ -1,18 +0,0 @@ -import unittest -from datetime import datetime - - -class TestEnum2Int(unittest.TestCase): - def test_remove_enum2int(self): - """ - Happy new year 2024. When this test start to fail: - - remove enum2int from orangecontrib.text.widgets.utils.__init__ - - change imports to Orange's enum2int in widget that use it - - remove this test - - depend orange3-text on orange 3.35 - """ - self.assertLess(datetime.today(), datetime(2024, 1, 1)) - - -if __name__ == "__main__": - unittest.main() diff --git a/requirements.txt b/requirements.txt index ccdc4fa48..36c489f1d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ lemmagen3 nltk>=3.0.5 # TweetTokenizer introduced in 3.0.5 numpy odfpy>=1.3.5 -Orange3 >=3.34.0 +Orange3 >=3.35.0 orange-widget-base >=4.20.0 orange-canvas-core owlready2 diff --git a/tox.ini b/tox.ini index df23983de..1dc69c880 100644 --- a/tox.ini +++ b/tox.ini @@ -24,8 +24,8 @@ setenv = deps = {env:PYQT_PYPI_NAME:PyQt5}=={env:PYQT_PYPI_VERSION:5.15.*} {env:WEBENGINE_PYPI_NAME:PyQtWebEngine}=={env:WEBENGINE_PYPI_VERSION:5.15.*} - oldest: scikit-learn==1.0.1 - oldest: orange3==3.34.0 + oldest: scikit-learn==1.1.0 + oldest: orange3==3.35.0 oldest: orange-canvas-core==0.1.30 oldest: orange-widget-base==4.20.0 oldest: pandas==1.4.0 From f519388fd81782f4df3486f5c1452def5e7e1361 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Fri, 12 Jan 2024 10:23:08 +0100 Subject: [PATCH 3/3] Corpus - Do not copy attributes dictionary in retain_preprocessing since it is handled by Orange --- orangecontrib/text/corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py index 2af7e5161..f4c80577c 100644 --- a/orangecontrib/text/corpus.py +++ b/orangecontrib/text/corpus.py @@ -328,6 +328,7 @@ def _rename_features(additional_names: List) -> Tuple[List, List, List]: self.metas.copy(), self.W.copy(), text_features=copy(self.text_features), + attributes=self.attributes, ) c.name = self.name # keep corpus's name Corpus.retain_preprocessing(self, c) @@ -643,7 +644,6 @@ def retain_preprocessing(orig, new, key=...): new._titles = orig._titles[key] new.ngram_range = orig.ngram_range - new.attributes = orig.attributes new.used_preprocessor = orig.used_preprocessor else: # orig is not Corpus new._set_unique_titles()