From 626f9a51a8fd1e6ecd100fb32d3df0dc6a9c350a Mon Sep 17 00:00:00 2001 From: Alexander Larin Date: Sun, 8 Dec 2019 16:35:32 +0300 Subject: [PATCH 1/2] [FIX] table_to_frame: metas lost on convertation --- Orange/data/pandas_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py index c8aaa9c8591..e6161eaeec9 100644 --- a/Orange/data/pandas_compat.py +++ b/Orange/data/pandas_compat.py @@ -122,6 +122,6 @@ def _columns_to_series(cols, vals): if domain.metas: metas = _columns_to_series(domain.metas, tab.metas) all_series = dict(x + y + metas) - original_column_order = [var.name for var in tab.domain.variables] + original_column_order = [var.name for var in tab.domain.variables + tab.domain.metas] unsorted_columns_df = pd.DataFrame(all_series) return unsorted_columns_df[original_column_order] From 8b2a8f5e69f6c1e65d7d13f9e868c08cbed165b3 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Fri, 10 Jan 2020 11:17:58 +0100 Subject: [PATCH 2/2] table_to_frame: ensure backward compatibility --- Orange/data/pandas_compat.py | 10 ++++++++-- Orange/data/tests/test_pandas.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py index e6161eaeec9..a15b38656ab 100644 --- a/Orange/data/pandas_compat.py +++ b/Orange/data/pandas_compat.py @@ -78,7 +78,7 @@ def _is_datetime(s): np.column_stack(M) if M else None) -def table_to_frame(tab): +def table_to_frame(tab, include_metas=False): """ Convert Orange.data.Table to pandas.DataFrame @@ -86,6 +86,9 @@ def table_to_frame(tab): ---------- tab : Table + include_metas : bool, (default=False) + Include table metas into dataframe. + Returns ------- pandas.DataFrame @@ -122,6 +125,9 @@ def _columns_to_series(cols, vals): if domain.metas: metas = _columns_to_series(domain.metas, tab.metas) all_series = dict(x + y + metas) - original_column_order = [var.name for var in tab.domain.variables + tab.domain.metas] + all_vars = tab.domain.variables + if include_metas: + all_vars += tab.domain.metas + original_column_order = [var.name for var in all_vars] unsorted_columns_df = pd.DataFrame(all_series) return unsorted_columns_df[original_column_order] diff --git a/Orange/data/tests/test_pandas.py b/Orange/data/tests/test_pandas.py index 391b3af56bc..b0e4de50f39 100644 --- a/Orange/data/tests/test_pandas.py +++ b/Orange/data/tests/test_pandas.py @@ -1,3 +1,4 @@ +# pylint: disable=import-outside-toplevel import unittest import numpy as np from Orange.data import ContinuousVariable, DiscreteVariable, TimeVariable, Table @@ -7,6 +8,7 @@ except ImportError: pd = None + @unittest.skipIf(pd is None, "Missing package 'pandas'") class TestPandasCompat(unittest.TestCase): def test_table_from_frame(self): @@ -73,6 +75,20 @@ def test_table_to_frame(self): self.assertEqual(list(df['sepal length'])[0:4], [5.1, 4.9, 4.7, 4.6]) self.assertEqual(list(df['iris'])[0:2], ['Iris-setosa', 'Iris-setosa']) + def test_table_to_frame_metas(self): + from Orange.data.pandas_compat import table_to_frame + + table = Table("zoo") + domain = table.domain + + df = table_to_frame(table) + cols = pd.Index([var.name for var in domain.variables]) + pd.testing.assert_index_equal(df.columns, cols) + + df = table_to_frame(table, include_metas=True) + cols = pd.Index([var.name for var in domain.variables + domain.metas]) + pd.testing.assert_index_equal(df.columns, cols) + @unittest.skip("Convert all Orange demo dataset. It takes about 5s which is way to slow") def test_table_to_frame_on_all_orange_dataset(self): from os import listdir @@ -96,3 +112,7 @@ def _get_orange_demo_datasets(): self.assertEqual(type(df), pd.DataFrame, assert_message) self.assertEqual(len(df), len(table), assert_message) self.assertEqual(len(df.columns), len(table.domain), assert_message) + + +if __name__ == "__main__": + unittest.main()