From 35c3dc8a922593692d4209217b0ca49749907e3c Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Fri, 12 Jan 2024 09:58:49 +0100
Subject: [PATCH 1/3] Corpus - Remove attribute copying
---
orangecontrib/text/corpus.py | 4 ----
orangecontrib/text/tests/test_corpus.py | 14 --------------
2 files changed, 18 deletions(-)
diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py
index b27f096ab..2af7e5161 100644
--- a/orangecontrib/text/corpus.py
+++ b/orangecontrib/text/corpus.py
@@ -544,8 +544,6 @@ def from_table(cls, domain, source, row_indices=...):
c = super().from_table(domain, source, row_indices)
c._setup_corpus()
Corpus.retain_preprocessing(source, c, row_indices)
- # temp fix: remove when oldest Orange >= 3.34
- c.attributes = deepcopy(c.attributes)
return c
@classmethod
@@ -587,8 +585,6 @@ def from_table_rows(cls, source, row_indices):
if hasattr(source, "_titles"):
# covering case when from_table_rows called by from_table
c._titles = source._titles[row_indices]
- # temp fix: remove when oldest Orange >= 3.34
- c.attributes = deepcopy(c.attributes)
return c
@classmethod
diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py
index ac9b6e7cf..42a82df92 100644
--- a/orangecontrib/text/tests/test_corpus.py
+++ b/orangecontrib/text/tests/test_corpus.py
@@ -671,20 +671,6 @@ def test_language_copied(self):
self.assertEqual(new_corpus.language, "sl")
self.assertEqual(corpus.language, "en")
- def test_remove_attributes_copy(self):
- """
- Happy new year!
-
- We added a deepcopy of attributes to from_table and from_table_rows
- since Orange didn't copy attributes. It should be removed when oldest
- supported Orange is 3.34 or higher.
- When test starts to fail:
- - remove it
- - remove copying of attributes in from_table and from_table_rows
- - update Orange version to >=3.34 if not done yet
- """
- self.assertLess(datetime.today(), datetime(2024, 1, 1))
-
def test_language_unpickle(self):
path = os.path.dirname(__file__)
file = os.path.abspath(os.path.join(path, "data", "book-excerpts.pkl"))
From 9d78e0f0e8cf61f677fed274a6e1d7651b4db1d1 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Fri, 12 Jan 2024 10:03:50 +0100
Subject: [PATCH 2/3] Use enum2int from Orange
---
orangecontrib/text/widgets/owkeywords.py | 2 +-
.../text/widgets/owscoredocuments.py | 2 +-
orangecontrib/text/widgets/utils/__init__.py | 21 -------------------
.../text/widgets/utils/tests/test_utils.py | 18 ----------------
requirements.txt | 2 +-
tox.ini | 4 ++--
6 files changed, 5 insertions(+), 44 deletions(-)
delete mode 100644 orangecontrib/text/widgets/utils/tests/test_utils.py
diff --git a/orangecontrib/text/widgets/owkeywords.py b/orangecontrib/text/widgets/owkeywords.py
index c2f6b85e6..a5df964a8 100644
--- a/orangecontrib/text/widgets/owkeywords.py
+++ b/orangecontrib/text/widgets/owkeywords.py
@@ -15,6 +15,7 @@
from Orange.widgets import gui
from Orange.widgets.settings import DomainContextHandler, ContextSetting, \
Setting
+from Orange.widgets.utils import enum2int
from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState
from Orange.widgets.utils.itemmodels import PyTableModel, TableModel
from Orange.widgets.widget import Input, Output, OWWidget, Msg
@@ -23,7 +24,6 @@
from orangecontrib.text.keywords import ScoringMethods, AggregationMethods, \
YAKE_LANGUAGE_MAPPING, RAKE_LANGUAGES
from orangecontrib.text.preprocess import BaseNormalizer
-from orangecontrib.text.widgets.utils import enum2int
from orangecontrib.text.widgets.utils.words import create_words_table, \
WORDS_COLUMN_NAME
diff --git a/orangecontrib/text/widgets/owscoredocuments.py b/orangecontrib/text/widgets/owscoredocuments.py
index 649a6d2bb..9637ba2ca 100644
--- a/orangecontrib/text/widgets/owscoredocuments.py
+++ b/orangecontrib/text/widgets/owscoredocuments.py
@@ -24,6 +24,7 @@
from Orange.data.util import get_unique_names
from Orange.util import wrap_callback
from Orange.widgets.settings import ContextSetting, PerfectDomainContextHandler, Setting
+from Orange.widgets.utils import enum2int
from Orange.widgets.utils.annotated_data import create_annotated_table, add_columns
from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState
from Orange.widgets.utils.itemmodels import PyTableModel, TableModel
@@ -36,7 +37,6 @@
from orangecontrib.text import Corpus
from orangecontrib.text.preprocess import BaseNormalizer, NGrams, BaseTokenFilter
from orangecontrib.text.vectorization.sbert import SBERT
-from orangecontrib.text.widgets.utils import enum2int
from orangecontrib.text.widgets.utils.words import create_words_table
diff --git a/orangecontrib/text/widgets/utils/__init__.py b/orangecontrib/text/widgets/utils/__init__.py
index eba92aea3..c7b2a6b05 100644
--- a/orangecontrib/text/widgets/utils/__init__.py
+++ b/orangecontrib/text/widgets/utils/__init__.py
@@ -1,24 +1,3 @@
-from enum import IntEnum, Enum
-from typing import Union
-
from .decorators import *
from .widgets import *
from .concurrent import asynchronous
-
-
-def enum2int(enum: Union[Enum, IntEnum]) -> int:
- """
- PyQt5 uses IntEnum like object for settings, for example SortOrder while
- PyQt6 uses Enum. PyQt5's IntEnum also does not support value attribute.
- This function transform both settings objects to int.
-
- Parameters
- ----------
- enum
- IntEnum like object or Enum object with Qt's settings
-
- Returns
- -------
- Settings transformed to int
- """
- return int(enum) if isinstance(enum, int) else enum.value
diff --git a/orangecontrib/text/widgets/utils/tests/test_utils.py b/orangecontrib/text/widgets/utils/tests/test_utils.py
deleted file mode 100644
index 90f100a5b..000000000
--- a/orangecontrib/text/widgets/utils/tests/test_utils.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import unittest
-from datetime import datetime
-
-
-class TestEnum2Int(unittest.TestCase):
- def test_remove_enum2int(self):
- """
- Happy new year 2024. When this test start to fail:
- - remove enum2int from orangecontrib.text.widgets.utils.__init__
- - change imports to Orange's enum2int in widget that use it
- - remove this test
- - depend orange3-text on orange 3.35
- """
- self.assertLess(datetime.today(), datetime(2024, 1, 1))
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/requirements.txt b/requirements.txt
index ccdc4fa48..36c489f1d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ lemmagen3
nltk>=3.0.5 # TweetTokenizer introduced in 3.0.5
numpy
odfpy>=1.3.5
-Orange3 >=3.34.0
+Orange3 >=3.35.0
orange-widget-base >=4.20.0
orange-canvas-core
owlready2
diff --git a/tox.ini b/tox.ini
index df23983de..1dc69c880 100644
--- a/tox.ini
+++ b/tox.ini
@@ -24,8 +24,8 @@ setenv =
deps =
{env:PYQT_PYPI_NAME:PyQt5}=={env:PYQT_PYPI_VERSION:5.15.*}
{env:WEBENGINE_PYPI_NAME:PyQtWebEngine}=={env:WEBENGINE_PYPI_VERSION:5.15.*}
- oldest: scikit-learn==1.0.1
- oldest: orange3==3.34.0
+ oldest: scikit-learn==1.1.0
+ oldest: orange3==3.35.0
oldest: orange-canvas-core==0.1.30
oldest: orange-widget-base==4.20.0
oldest: pandas==1.4.0
From f519388fd81782f4df3486f5c1452def5e7e1361 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Fri, 12 Jan 2024 10:23:08 +0100
Subject: [PATCH 3/3] Corpus - Do not copy attributes dictionary in
retain_preprocessing since it is handled by Orange
---
orangecontrib/text/corpus.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py
index 2af7e5161..f4c80577c 100644
--- a/orangecontrib/text/corpus.py
+++ b/orangecontrib/text/corpus.py
@@ -328,6 +328,7 @@ def _rename_features(additional_names: List) -> Tuple[List, List, List]:
self.metas.copy(),
self.W.copy(),
text_features=copy(self.text_features),
+ attributes=self.attributes,
)
c.name = self.name # keep corpus's name
Corpus.retain_preprocessing(self, c)
@@ -643,7 +644,6 @@ def retain_preprocessing(orig, new, key=...):
new._titles = orig._titles[key]
new.ngram_range = orig.ngram_range
- new.attributes = orig.attributes
new.used_preprocessor = orig.used_preprocessor
else: # orig is not Corpus
new._set_unique_titles()