diff --git a/src/taipy/core/data/_abstract_sql.py b/src/taipy/core/data/_abstract_sql.py index f024c9ae..a41d3b0e 100644 --- a/src/taipy/core/data/_abstract_sql.py +++ b/src/taipy/core/data/_abstract_sql.py @@ -14,7 +14,7 @@ import urllib.parse from abc import abstractmethod from datetime import datetime, timedelta -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional, Set, Tuple, Union import modin.pandas as modin_pd import numpy as np @@ -24,6 +24,7 @@ from taipy.config.common.scope import Scope from .._version._version_manager_factory import _VersionManagerFactory +from ..data.operator import JoinOperator, Operator from ..exceptions.exceptions import MissingRequiredProperty, UnknownDatabaseEngine from ._abstract_tabular import _AbstractTabularDataNode from .data_node import DataNode @@ -198,6 +199,15 @@ def _conn_string(self) -> str: raise UnknownDatabaseEngine(f"Unknown engine: {engine}") + def filter(self, operators: Optional[Union[List, Tuple]] = None, join_operator=JoinOperator.AND): + if self.properties[self.__EXPOSED_TYPE_PROPERTY] == self.__EXPOSED_TYPE_PANDAS: + return self._read_as_pandas_dataframe(operators=operators, join_operator=join_operator) + if self.properties[self.__EXPOSED_TYPE_PROPERTY] == self.__EXPOSED_TYPE_MODIN: + return self._read_as_modin_dataframe(operators=operators, join_operator=join_operator) + if self.properties[self.__EXPOSED_TYPE_PROPERTY] == self.__EXPOSED_TYPE_NUMPY: + return self._read_as_numpy(operators=operators, join_operator=join_operator) + return self._read_as(operators=operators, join_operator=join_operator) + def _read(self): if self.properties[self.__EXPOSED_TYPE_PROPERTY] == self.__EXPOSED_TYPE_PANDAS: return self._read_as_pandas_dataframe() @@ -207,32 +217,76 @@ def _read(self): return self._read_as_numpy() return self._read_as() - def _read_as(self): + def _read_as(self, operators: Optional[Union[List, Tuple]] = None, join_operator=JoinOperator.AND): custom_class = self.properties[self.__EXPOSED_TYPE_PROPERTY] with self._get_engine().connect() as connection: - query_result = connection.execute(text(self._get_read_query())) + query_result = connection.execute(text(self._get_read_query(operators, join_operator))) return list(map(lambda row: custom_class(**row), query_result)) - def _read_as_numpy(self) -> np.ndarray: - return self._read_as_pandas_dataframe().to_numpy() + def _read_as_numpy( + self, operators: Optional[Union[List, Tuple]] = None, join_operator=JoinOperator.AND + ) -> np.ndarray: + return self._read_as_pandas_dataframe(operators=operators, join_operator=join_operator).to_numpy() - def _read_as_pandas_dataframe(self, columns: Optional[List[str]] = None): + def _read_as_pandas_dataframe( + self, + columns: Optional[List[str]] = None, + operators: Optional[Union[List, Tuple]] = None, + join_operator=JoinOperator.AND, + ): with self._get_engine().connect() as conn: if columns: - return pd.DataFrame(conn.execute(text(self._get_read_query())))[columns] - return pd.DataFrame(conn.execute(text(self._get_read_query()))) + return pd.DataFrame(conn.execute(text(self._get_read_query(operators, join_operator))))[columns] + return pd.DataFrame(conn.execute(text(self._get_read_query(operators, join_operator)))) - def _read_as_modin_dataframe(self, columns: Optional[List[str]] = None): + def _read_as_modin_dataframe( + self, + columns: Optional[List[str]] = None, + operators: Optional[Union[List, Tuple]] = None, + join_operator=JoinOperator.AND, + ): if columns: - return modin_pd.read_sql_query(self._get_read_query(), con=self._get_engine())[columns] - return modin_pd.read_sql_query(self._get_read_query(), con=self._get_engine()) + return modin_pd.read_sql_query(self._get_read_query(operators, join_operator), con=self._get_engine())[ + columns + ] + return modin_pd.read_sql_query(self._get_read_query(operators, join_operator), con=self._get_engine()) @abstractmethod - def _get_read_query(self): - raise NotImplementedError + def _get_read_query(self, operators: Optional[Union[List, Tuple]] = None, join_operator=JoinOperator.AND): + query = self._get_base_read_query() + + if not operators: + return query + + if not isinstance(operators, List): + operators = [operators] + + conditions = [] + for key, value, operator in operators: + if operator == Operator.EQUAL: + conditions.append(f"{key} = '{value}'") + elif operator == Operator.NOT_EQUAL: + conditions.append(f"{key} <> '{value}'") + elif operator == Operator.GREATER_THAN: + conditions.append(f"{key} > '{value}'") + elif operator == Operator.GREATER_OR_EQUAL: + conditions.append(f"{key} >= '{value}'") + elif operator == Operator.LESS_THAN: + conditions.append(f"{key} < '{value}'") + elif operator == Operator.LESS_OR_EQUAL: + conditions.append(f"{key} <= '{value}'") + + if join_operator == JoinOperator.AND: + query += f" WHERE {' AND '.join(conditions)}" + elif join_operator == JoinOperator.OR: + query += f" WHERE {' OR '.join(conditions)}" + else: + raise NotImplementedError(f"Join operator {join_operator} not implemented.") + + return query @abstractmethod - def _do_write(self, data, engine, connection) -> None: + def _get_base_read_query(self) -> str: raise NotImplementedError def _write(self, data) -> None: @@ -248,6 +302,10 @@ def _write(self, data) -> None: else: transaction.commit() + @abstractmethod + def _do_write(self, data, engine, connection) -> None: + raise NotImplementedError + def __setattr__(self, key: str, value) -> None: if key in self.__ENGINE_PROPERTIES: self._engine = None diff --git a/src/taipy/core/data/data_node.py b/src/taipy/core/data/data_node.py index 8a98160d..72a86ce2 100644 --- a/src/taipy/core/data/data_node.py +++ b/src/taipy/core/data/data_node.py @@ -15,10 +15,7 @@ from datetime import datetime, timedelta from typing import Any, Dict, List, Optional, Set, Tuple, Union -import modin.pandas as modin_pd import networkx as nx -import numpy as np -import pandas as pd from taipy.config.common._validate_id import _validate_id from taipy.config.common.scope import Scope diff --git a/src/taipy/core/data/mongo.py b/src/taipy/core/data/mongo.py index 769c609e..2ffa2aa0 100644 --- a/src/taipy/core/data/mongo.py +++ b/src/taipy/core/data/mongo.py @@ -11,12 +11,13 @@ from datetime import datetime, timedelta from inspect import isclass -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Optional, Set, Tuple, Union from taipy.config.common.scope import Scope from .._version._version_manager_factory import _VersionManagerFactory from ..common._mongo_connector import _connect_mongodb +from ..data.operator import JoinOperator, Operator from ..exceptions.exceptions import InvalidCustomDocument, MissingRequiredProperty from .data_node import DataNode from .data_node_id import DataNodeId, Edit @@ -175,19 +176,49 @@ def _check_custom_document(self, custom_document): def storage_type(cls) -> str: return cls.__STORAGE_TYPE + def filter(self, operators: Optional[Union[List, Tuple]] = None, join_operator=JoinOperator.AND): + cursor = self._read_by_query(operators, join_operator) + return list(map(lambda row: self._decoder(row), cursor)) + def _read(self): cursor = self._read_by_query() - return list(map(lambda row: self._decoder(row), cursor)) - def _read_by_query(self): + def _read_by_query(self, operators: Optional[Union[List, Tuple]] = None, join_operator=JoinOperator.AND): """Query from a Mongo collection, exclude the _id field""" + if not operators: + return self.collection.find() + + if not isinstance(operators, List): + operators = [operators] + + conditions = [] + for key, value, operator in operators: + if operator == Operator.EQUAL: + conditions.append({key: value}) + elif operator == Operator.NOT_EQUAL: + conditions.append({key: {"$ne": value}}) + elif operator == Operator.GREATER_THAN: + conditions.append({key: {"$gt": value}}) + elif operator == Operator.GREATER_OR_EQUAL: + conditions.append({key: {"$gte": value}}) + elif operator == Operator.LESS_THAN: + conditions.append({key: {"$lt": value}}) + elif operator == Operator.LESS_OR_EQUAL: + conditions.append({key: {"$lte": value}}) + + query = {} + if join_operator == JoinOperator.AND: + query = {"$and": conditions} + elif join_operator == JoinOperator.OR: + query = {"$or": conditions} + else: + raise NotImplementedError(f"Join operator {join_operator} is not supported.") - return self.collection.find() + return self.collection.find(query) def _write(self, data) -> None: """Check data against a collection of types to handle insertion on the database.""" - if not isinstance(data, list): data = [data] diff --git a/src/taipy/core/data/sql.py b/src/taipy/core/data/sql.py index 3f368de4..0e9dbc72 100644 --- a/src/taipy/core/data/sql.py +++ b/src/taipy/core/data/sql.py @@ -123,7 +123,7 @@ def __init__( def storage_type(cls) -> str: return cls.__STORAGE_TYPE - def _get_read_query(self): + def _get_base_read_query(self) -> str: return self.properties.get(self.__READ_QUERY_KEY) def _do_write(self, data, engine, connection) -> None: diff --git a/src/taipy/core/data/sql_table.py b/src/taipy/core/data/sql_table.py index d2fa86a1..b736243a 100644 --- a/src/taipy/core/data/sql_table.py +++ b/src/taipy/core/data/sql_table.py @@ -113,7 +113,7 @@ def __init__( def storage_type(cls) -> str: return cls.__STORAGE_TYPE - def _get_read_query(self): + def _get_base_read_query(self) -> str: return f"SELECT * FROM {self.properties[self.__TABLE_KEY]}" def _do_write(self, data, engine, connection) -> None: diff --git a/tests/conftest.py b/tests/conftest.py index dff4481c..a919bd5d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -144,9 +144,9 @@ def tmp_sqlite_db_file_path(tmpdir_factory): file_extension = ".db" db = create_engine("sqlite:///" + os.path.join(fn.strpath, f"{db_name}{file_extension}")) conn = db.connect() - conn.execute(text("CREATE TABLE foo (foo int, bar int);")) - conn.execute(text("INSERT INTO foo (foo, bar) VALUES (1, 2);")) - conn.execute(text("INSERT INTO foo (foo, bar) VALUES (3, 4);")) + conn.execute(text("CREATE TABLE example (foo int, bar int);")) + conn.execute(text("INSERT INTO example (foo, bar) VALUES (1, 2);")) + conn.execute(text("INSERT INTO example (foo, bar) VALUES (3, 4);")) conn.commit() conn.close() db.dispose() @@ -162,9 +162,9 @@ def tmp_sqlite_sqlite3_file_path(tmpdir_factory): db = create_engine("sqlite:///" + os.path.join(fn.strpath, f"{db_name}{file_extension}")) conn = db.connect() - conn.execute(text("CREATE TABLE foo (foo int, bar int);")) - conn.execute(text("INSERT INTO foo (foo, bar) VALUES (1, 2);")) - conn.execute(text("INSERT INTO foo (foo, bar) VALUES (3, 4);")) + conn.execute(text("CREATE TABLE example (foo int, bar int);")) + conn.execute(text("INSERT INTO example (foo, bar) VALUES (1, 2);")) + conn.execute(text("INSERT INTO example (foo, bar) VALUES (3, 4);")) conn.commit() conn.close() db.dispose() diff --git a/tests/core/data/test_mongo_data_node.py b/tests/core/data/test_mongo_data_node.py index 75fa8d1b..e1420915 100644 --- a/tests/core/data/test_mongo_data_node.py +++ b/tests/core/data/test_mongo_data_node.py @@ -12,6 +12,7 @@ from dataclasses import dataclass from datetime import datetime +from unittest.mock import patch import mongomock import pymongo @@ -339,3 +340,16 @@ def test_filter(self, properties): {"bar": 2}, {}, ] + + @mongomock.patch(servers=(("localhost", 27017),)) + @pytest.mark.parametrize("properties", __properties) + def test_filter_does_not_read_all_entities(self, properties): + mongo_dn = MongoCollectionDataNode("foo", Scope.SCENARIO, properties=properties) + + # MongoCollectionDataNode.filter() should not call the MongoCollectionDataNode._read() method + with patch.object(MongoCollectionDataNode, "_read") as read_mock: + mongo_dn.filter(("foo", 1, Operator.EQUAL)) + mongo_dn.filter(("bar", 2, Operator.NOT_EQUAL)) + mongo_dn.filter([("bar", 1, Operator.EQUAL), ("bar", 2, Operator.EQUAL)], JoinOperator.OR) + + assert read_mock["_read"].call_count == 0 diff --git a/tests/core/data/test_sql_data_node.py b/tests/core/data/test_sql_data_node.py index 11c220ce..d1608edf 100644 --- a/tests/core/data/test_sql_data_node.py +++ b/tests/core/data/test_sql_data_node.py @@ -10,7 +10,7 @@ # specific language governing permissions and limitations under the License. from importlib import util -from unittest import mock +from unittest.mock import patch import modin.pandas as modin_pd import numpy as np @@ -36,16 +36,16 @@ def __init__(self, foo=None, bar=None, *args, **kwargs): def my_write_query_builder_with_pandas(data: pd.DataFrame): insert_data = data.to_dict("records") - return ["DELETE FROM foo", ("INSERT INTO foo VALUES (:foo, :bar)", insert_data)] + return ["DELETE FROM example", ("INSERT INTO example VALUES (:foo, :bar)", insert_data)] def my_write_query_builder_with_modin(data: modin_pd.DataFrame): insert_data = data.to_dict("records") - return ["DELETE FROM foo", ("INSERT INTO foo VALUES (:foo, :bar)", insert_data)] + return ["DELETE FROM example", ("INSERT INTO example VALUES (:foo, :bar)", insert_data)] def single_write_query_builder(data): - return "DELETE FROM foo" + return "DELETE FROM example" class TestSQLDataNode: @@ -53,7 +53,7 @@ class TestSQLDataNode: { "db_name": "taipy.sqlite3", "db_engine": "sqlite", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_pandas, "db_extra_args": { "TrustServerCertificate": "yes", @@ -66,7 +66,7 @@ class TestSQLDataNode: { "db_name": "taipy.sqlite3", "db_engine": "sqlite", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_modin, "exposed_type": "modin", "db_extra_args": { @@ -83,7 +83,7 @@ class TestSQLDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mssql", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_pandas, "db_extra_args": { "TrustServerCertificate": "yes", @@ -96,7 +96,7 @@ class TestSQLDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mssql", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_modin, "exposed_type": "modin", "db_extra_args": { @@ -112,7 +112,7 @@ class TestSQLDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mysql", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_pandas, "db_extra_args": { "TrustServerCertificate": "yes", @@ -125,7 +125,7 @@ class TestSQLDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mysql", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_modin, "exposed_type": "modin", "db_extra_args": { @@ -141,7 +141,7 @@ class TestSQLDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "postgresql", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_pandas, "db_extra_args": { "TrustServerCertificate": "yes", @@ -154,7 +154,7 @@ class TestSQLDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "postgresql", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_modin, "exposed_type": "modin", "db_extra_args": { @@ -180,7 +180,7 @@ def test_create(self, pandas_properties, modin_properties): assert dn.job_ids == [] assert dn.is_ready_for_reading assert dn.exposed_type == "pandas" - assert dn.read_query == "SELECT * FROM foo" + assert dn.read_query == "SELECT * FROM example" assert dn.write_query_builder == my_write_query_builder_with_pandas dn = SQLDataNode( @@ -197,7 +197,7 @@ def test_create(self, pandas_properties, modin_properties): assert dn.job_ids == [] assert dn.is_ready_for_reading assert dn.exposed_type == "modin" - assert dn.read_query == "SELECT * FROM foo" + assert dn.read_query == "SELECT * FROM example" assert dn.write_query_builder == my_write_query_builder_with_modin @pytest.mark.parametrize("properties", __pandas_properties + __modin_properties) @@ -236,13 +236,13 @@ def test_write_query_builder(self, pandas_properties, modin_properties): custom_properties = pandas_properties.copy() custom_properties.pop("db_extra_args") dn = SQLDataNode("foo_bar", Scope.SCENARIO, properties=custom_properties) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock: + with patch("sqlalchemy.engine.Engine.connect") as engine_mock: # mock connection execute dn.write(pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})) assert len(engine_mock.mock_calls[4].args) == 1 - assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM foo" + assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM example" assert len(engine_mock.mock_calls[5].args) == 2 - assert engine_mock.mock_calls[5].args[0].text == "INSERT INTO foo VALUES (:foo, :bar)" + assert engine_mock.mock_calls[5].args[0].text == "INSERT INTO example VALUES (:foo, :bar)" assert engine_mock.mock_calls[5].args[1] == [ {"foo": 1, "bar": 4}, {"foo": 2, "bar": 5}, @@ -252,22 +252,22 @@ def test_write_query_builder(self, pandas_properties, modin_properties): custom_properties["write_query_builder"] = single_write_query_builder dn = SQLDataNode("foo_bar", Scope.SCENARIO, properties=custom_properties) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock: + with patch("sqlalchemy.engine.Engine.connect") as engine_mock: # mock connection execute dn.write(pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})) assert len(engine_mock.mock_calls[4].args) == 1 - assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM foo" + assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM example" custom_properties = modin_properties.copy() custom_properties.pop("db_extra_args") dn = SQLDataNode("foo_bar", Scope.SCENARIO, properties=custom_properties) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock: + with patch("sqlalchemy.engine.Engine.connect") as engine_mock: # mock connection execute dn.write(modin_pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})) assert len(engine_mock.mock_calls[4].args) == 1 - assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM foo" + assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM example" assert len(engine_mock.mock_calls[5].args) == 2 - assert engine_mock.mock_calls[5].args[0].text == "INSERT INTO foo VALUES (:foo, :bar)" + assert engine_mock.mock_calls[5].args[0].text == "INSERT INTO example VALUES (:foo, :bar)" assert engine_mock.mock_calls[5].args[1] == [ {"foo": 1, "bar": 4}, {"foo": 2, "bar": 5}, @@ -277,11 +277,11 @@ def test_write_query_builder(self, pandas_properties, modin_properties): custom_properties["write_query_builder"] = single_write_query_builder dn = SQLDataNode("foo_bar", Scope.SCENARIO, properties=custom_properties) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock: + with patch("sqlalchemy.engine.Engine.connect") as engine_mock: # mock connection execute dn.write(modin_pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})) assert len(engine_mock.mock_calls[4].args) == 1 - assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM foo" + assert engine_mock.mock_calls[4].args[0].text == "DELETE FROM example" @pytest.mark.parametrize( "tmp_sqlite_path", @@ -295,7 +295,7 @@ def test_sqlite_read_file_with_different_extension(self, tmp_sqlite_path, reques folder_path, db_name, file_extension = tmp_sqlite_path properties = { "db_engine": "sqlite", - "read_query": "SELECT * from foo", + "read_query": "SELECT * from example", "write_query_builder": single_write_query_builder, "db_name": db_name, "sqlite_folder_path": folder_path, @@ -310,7 +310,7 @@ def test_filter_pandas_exposed_type(self, tmp_sqlite_sqlite3_file_path): folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path properties = { "db_engine": "sqlite", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_pandas, "db_name": db_name, "sqlite_folder_path": folder_path, @@ -323,34 +323,29 @@ def test_filter_pandas_exposed_type(self, tmp_sqlite_sqlite3_file_path): [ {"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, - {"foo": 1}, + {"foo": 1, "bar": 3}, + {"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, - {"bar": 2}, + {"foo": 2, "bar": 3}, ] ) ) # Test datanode indexing and slicing - assert dn["foo"].equals(pd.Series([1, 1, 1, 2, None])) - assert dn["bar"].equals(pd.Series([1, 2, None, 2, 2])) - assert dn[:2].equals(pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}])) + assert dn["foo"].equals(pd.Series([1, 1, 1, 2, 2, 2])) + assert dn["bar"].equals(pd.Series([1, 2, 3, 1, 2, 3])) + assert dn[:2].equals(pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}])) # Test filter data filtered_by_filter_method = dn.filter(("foo", 1, Operator.EQUAL)) filtered_by_indexing = dn[dn["foo"] == 1] - expected_data = pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}, {"foo": 1.0}]) + expected_data = pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, {"foo": 1, "bar": 3}]) assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) assert_frame_equal(filtered_by_indexing.reset_index(drop=True), expected_data) filtered_by_filter_method = dn.filter(("foo", 1, Operator.NOT_EQUAL)) filtered_by_indexing = dn[dn["foo"] != 1] - expected_data = pd.DataFrame([{"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) - assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) - assert_frame_equal(filtered_by_indexing.reset_index(drop=True), expected_data) - - filtered_by_filter_method = dn.filter(("bar", 2, Operator.EQUAL)) - filtered_by_indexing = dn[dn["bar"] == 2] - expected_data = pd.DataFrame([{"foo": 1.0, "bar": 2.0}, {"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) + expected_data = pd.DataFrame([{"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, {"foo": 2, "bar": 3}]) assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) assert_frame_equal(filtered_by_indexing.reset_index(drop=True), expected_data) @@ -358,10 +353,10 @@ def test_filter_pandas_exposed_type(self, tmp_sqlite_sqlite3_file_path): filtered_by_indexing = dn[(dn["bar"] == 1) | (dn["bar"] == 2)] expected_data = pd.DataFrame( [ - {"foo": 1.0, "bar": 1.0}, - {"foo": 1.0, "bar": 2.0}, - {"foo": 2.0, "bar": 2.0}, - {"bar": 2.0}, + {"foo": 1, "bar": 1}, + {"foo": 1, "bar": 2}, + {"foo": 2, "bar": 1}, + {"foo": 2, "bar": 2}, ] ) assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) @@ -371,7 +366,7 @@ def test_filter_modin_exposed_type(self, tmp_sqlite_sqlite3_file_path): folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path properties = { "db_engine": "sqlite", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_modin, "db_name": db_name, "sqlite_folder_path": folder_path, @@ -384,34 +379,29 @@ def test_filter_modin_exposed_type(self, tmp_sqlite_sqlite3_file_path): [ {"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, - {"foo": 1}, + {"foo": 1, "bar": 3}, + {"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, - {"bar": 2}, + {"foo": 2, "bar": 3}, ] ) ) # Test datanode indexing and slicing - assert dn["foo"].equals(modin_pd.Series([1, 1, 1, 2, None])) - assert dn["bar"].equals(modin_pd.Series([1, 2, None, 2, 2])) - assert dn[:2].equals(modin_pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}])) + assert dn["foo"].equals(pd.Series([1, 1, 1, 2, 2, 2])) + assert dn["bar"].equals(pd.Series([1, 2, 3, 1, 2, 3])) + assert dn[:2].equals(modin_pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}])) # Test filter data filtered_by_filter_method = dn.filter(("foo", 1, Operator.EQUAL)) filtered_by_indexing = dn[dn["foo"] == 1] - expected_data = modin_pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}, {"foo": 1.0}]) + expected_data = modin_pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, {"foo": 1, "bar": 3}]) df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) df_equals(filtered_by_indexing.reset_index(drop=True), expected_data) filtered_by_filter_method = dn.filter(("foo", 1, Operator.NOT_EQUAL)) filtered_by_indexing = dn[dn["foo"] != 1] - expected_data = modin_pd.DataFrame([{"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) - df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) - df_equals(filtered_by_indexing.reset_index(drop=True), expected_data) - - filtered_by_filter_method = dn.filter(("bar", 2, Operator.EQUAL)) - filtered_by_indexing = dn[dn["bar"] == 2] - expected_data = modin_pd.DataFrame([{"foo": 1.0, "bar": 2.0}, {"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) + expected_data = modin_pd.DataFrame([{"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, {"foo": 2, "bar": 3}]) df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) df_equals(filtered_by_indexing.reset_index(drop=True), expected_data) @@ -419,10 +409,10 @@ def test_filter_modin_exposed_type(self, tmp_sqlite_sqlite3_file_path): filtered_by_indexing = dn[(dn["bar"] == 1) | (dn["bar"] == 2)] expected_data = modin_pd.DataFrame( [ - {"foo": 1.0, "bar": 1.0}, - {"foo": 1.0, "bar": 2.0}, - {"foo": 2.0, "bar": 2.0}, - {"bar": 2.0}, + {"foo": 1, "bar": 1}, + {"foo": 1, "bar": 2}, + {"foo": 2, "bar": 1}, + {"foo": 2, "bar": 2}, ] ) df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) @@ -432,7 +422,7 @@ def test_filter_numpy_exposed_type(self, tmp_sqlite_sqlite3_file_path): folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path properties = { "db_engine": "sqlite", - "read_query": "SELECT * FROM foo", + "read_query": "SELECT * FROM example", "write_query_builder": my_write_query_builder_with_pandas, "db_name": db_name, "sqlite_folder_path": folder_path, @@ -461,17 +451,38 @@ def test_filter_numpy_exposed_type(self, tmp_sqlite_sqlite3_file_path): assert np.array_equal(dn[1:4, :1], np.array([[1], [1], [2]])) # Test filter data - assert np.array_equal(dn.filter((0, 1, Operator.EQUAL)), np.array([[1, 1], [1, 2], [1, 3]])) + assert np.array_equal(dn.filter(("foo", 1, Operator.EQUAL)), np.array([[1, 1], [1, 2], [1, 3]])) assert np.array_equal(dn[dn[:, 0] == 1], np.array([[1, 1], [1, 2], [1, 3]])) - assert np.array_equal(dn.filter((0, 1, Operator.NOT_EQUAL)), np.array([[2, 1], [2, 2], [2, 3]])) + assert np.array_equal(dn.filter(("foo", 1, Operator.NOT_EQUAL)), np.array([[2, 1], [2, 2], [2, 3]])) assert np.array_equal(dn[dn[:, 0] != 1], np.array([[2, 1], [2, 2], [2, 3]])) - assert np.array_equal(dn.filter((1, 2, Operator.EQUAL)), np.array([[1, 2], [2, 2]])) + assert np.array_equal(dn.filter(("bar", 2, Operator.EQUAL)), np.array([[1, 2], [2, 2]])) assert np.array_equal(dn[dn[:, 1] == 2], np.array([[1, 2], [2, 2]])) assert np.array_equal( - dn.filter([(1, 1, Operator.EQUAL), (1, 2, Operator.EQUAL)], JoinOperator.OR), + dn.filter([("bar", 1, Operator.EQUAL), ("bar", 2, Operator.EQUAL)], JoinOperator.OR), np.array([[1, 1], [1, 2], [2, 1], [2, 2]]), ) assert np.array_equal(dn[(dn[:, 1] == 1) | (dn[:, 1] == 2)], np.array([[1, 1], [1, 2], [2, 1], [2, 2]])) + + def test_filter_does_not_read_all_entities(self, tmp_sqlite_sqlite3_file_path): + folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path + properties = { + "db_engine": "sqlite", + "read_query": "SELECT * FROM example", + "write_query_builder": my_write_query_builder_with_pandas, + "db_name": db_name, + "sqlite_folder_path": folder_path, + "sqlite_file_extension": file_extension, + "exposed_type": "numpy", + } + dn = SQLDataNode("foo", Scope.SCENARIO, properties=properties) + + # SQLDataNode.filter() should not call the MongoCollectionDataNode._read() method + with patch.object(SQLDataNode, "_read") as read_mock: + dn.filter(("foo", 1, Operator.EQUAL)) + dn.filter(("bar", 2, Operator.NOT_EQUAL)) + dn.filter([("bar", 1, Operator.EQUAL), ("bar", 2, Operator.EQUAL)], JoinOperator.OR) + + assert read_mock["_read"].call_count == 0 diff --git a/tests/core/data/test_sql_table_data_node.py b/tests/core/data/test_sql_table_data_node.py index cb211b54..4720b08f 100644 --- a/tests/core/data/test_sql_table_data_node.py +++ b/tests/core/data/test_sql_table_data_node.py @@ -10,7 +10,7 @@ # specific language governing permissions and limitations under the License. from importlib import util -from unittest import mock +from unittest.mock import patch import modin.pandas as modin_pd import numpy as np @@ -39,7 +39,7 @@ class TestSQLTableDataNode: { "db_name": "taipy", "db_engine": "sqlite", - "table_name": "foo", + "table_name": "example", "db_extra_args": { "TrustServerCertificate": "yes", "other": "value", @@ -51,7 +51,7 @@ class TestSQLTableDataNode: { "db_name": "taipy", "db_engine": "sqlite", - "table_name": "foo", + "table_name": "example", "exposed_type": "modin", "db_extra_args": { "TrustServerCertificate": "yes", @@ -67,7 +67,7 @@ class TestSQLTableDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mssql", - "table_name": "foo", + "table_name": "example", "db_extra_args": { "TrustServerCertificate": "yes", }, @@ -79,7 +79,7 @@ class TestSQLTableDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mssql", - "table_name": "foo", + "table_name": "example", "exposed_type": "modin", "db_extra_args": { "TrustServerCertificate": "yes", @@ -94,7 +94,7 @@ class TestSQLTableDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mysql", - "table_name": "foo", + "table_name": "example", "db_extra_args": { "TrustServerCertificate": "yes", }, @@ -106,7 +106,7 @@ class TestSQLTableDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "mysql", - "table_name": "foo", + "table_name": "example", "exposed_type": "modin", "db_extra_args": { "TrustServerCertificate": "yes", @@ -121,7 +121,7 @@ class TestSQLTableDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "postgresql", - "table_name": "foo", + "table_name": "example", "db_extra_args": { "TrustServerCertificate": "yes", }, @@ -133,7 +133,7 @@ class TestSQLTableDataNode: "db_password": "Passw0rd", "db_name": "taipy", "db_engine": "postgresql", - "table_name": "foo", + "table_name": "example", "exposed_type": "modin", "db_extra_args": { "TrustServerCertificate": "yes", @@ -158,8 +158,8 @@ def test_create(self, pandas_properties, modin_properties): assert dn.job_ids == [] assert dn.is_ready_for_reading assert dn.exposed_type == "pandas" - assert dn.table_name == "foo" - assert dn._get_read_query() == "SELECT * FROM foo" + assert dn.table_name == "example" + assert dn._get_base_read_query() == "SELECT * FROM example" dn = SQLTableDataNode( "foo_bar", @@ -175,8 +175,8 @@ def test_create(self, pandas_properties, modin_properties): assert dn.job_ids == [] assert dn.is_ready_for_reading assert dn.exposed_type == "modin" - assert dn.table_name == "foo" - assert dn._get_read_query() == "SELECT * FROM foo" + assert dn.table_name == "example" + assert dn._get_base_read_query() == "SELECT * FROM example" @pytest.mark.parametrize("properties", __pandas_properties) def test_get_user_properties(self, properties): @@ -204,10 +204,10 @@ def test_create_with_missing_parameters(self, properties): with pytest.raises(MissingRequiredProperty): SQLTableDataNode("foo", Scope.SCENARIO, DataNodeId("dn_id"), properties=properties) - @mock.patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as", return_value="custom") - @mock.patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as_pandas_dataframe", return_value="pandas") - @mock.patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as_modin_dataframe", return_value="modin") - @mock.patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as_numpy", return_value="numpy") + @patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as", return_value="custom") + @patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as_pandas_dataframe", return_value="pandas") + @patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as_modin_dataframe", return_value="modin") + @patch("src.taipy.core.data.sql_table.SQLTableDataNode._read_as_numpy", return_value="numpy") @pytest.mark.parametrize("pandas_properties", __pandas_properties) @pytest.mark.parametrize("modin_properties", __modin_properties) def test_read( @@ -254,7 +254,7 @@ def test_read_as(self, pandas_properties): custom_properties["exposed_type"] = MyCustomObject sql_data_node = SQLTableDataNode("foo", Scope.SCENARIO, properties=custom_properties) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock: + with patch("sqlalchemy.engine.Engine.connect") as engine_mock: cursor_mock = engine_mock.return_value.__enter__.return_value cursor_mock.execute.return_value = [ {"foo": "baz", "bar": "qux"}, @@ -290,7 +290,7 @@ def test_read_as(self, pandas_properties): assert len(data[5].args) == 0 assert len(data[5].kwargs) == 0 - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock: + with patch("sqlalchemy.engine.Engine.connect") as engine_mock: cursor_mock = engine_mock.return_value.__enter__.return_value cursor_mock.execute.return_value = [] data_2 = sql_data_node._read_as() @@ -318,13 +318,13 @@ def test_write(self, data, written_data, called_func, pandas_properties): custom_properties.pop("db_extra_args") dn = SQLTableDataNode("foo", Scope.SCENARIO, properties=custom_properties) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock, mock.patch( + with patch("sqlalchemy.engine.Engine.connect") as engine_mock, patch( "src.taipy.core.data.sql_table.SQLTableDataNode._create_table" ) as create_table_mock: cursor_mock = engine_mock.return_value.__enter__.return_value cursor_mock.execute.side_effect = None - with mock.patch(f"src.taipy.core.data.sql_table.SQLTableDataNode.{called_func}") as mck: + with patch(f"src.taipy.core.data.sql_table.SQLTableDataNode.{called_func}") as mck: dn.write(data) mck.assert_called_once_with(written_data, create_table_mock.return_value, cursor_mock) @@ -345,13 +345,13 @@ def test_write_dataframe(self, pandas_properties, modin_properties): dn = SQLTableDataNode("foo", Scope.SCENARIO, properties=custom_properties) df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock, mock.patch( + with patch("sqlalchemy.engine.Engine.connect") as engine_mock, patch( "src.taipy.core.data.sql_table.SQLTableDataNode._create_table" ): cursor_mock = engine_mock.return_value.__enter__.return_value cursor_mock.execute.side_effect = None - with mock.patch("src.taipy.core.data.sql_table.SQLTableDataNode._insert_dataframe") as mck: + with patch("src.taipy.core.data.sql_table.SQLTableDataNode._insert_dataframe") as mck: dn.write(df) assert mck.call_args[0][0].equals(df) @@ -361,13 +361,13 @@ def test_write_dataframe(self, pandas_properties, modin_properties): dn = SQLTableDataNode("foo", Scope.SCENARIO, properties=custom_properties) df = modin_pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock, mock.patch( + with patch("sqlalchemy.engine.Engine.connect") as engine_mock, patch( "src.taipy.core.data.sql_table.SQLTableDataNode._create_table" ): cursor_mock = engine_mock.return_value.__enter__.return_value cursor_mock.execute.side_effect = None - with mock.patch("src.taipy.core.data.sql_table.SQLTableDataNode._insert_dataframe") as mck: + with patch("src.taipy.core.data.sql_table.SQLTableDataNode._insert_dataframe") as mck: dn.write(df) assert mck.call_args[0][0].equals(df) @@ -384,18 +384,18 @@ def test_write_empty_list(self, data, pandas_properties): custom_properties.pop("db_extra_args") dn = SQLTableDataNode("foo", Scope.SCENARIO, properties=custom_properties) - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock, mock.patch( + with patch("sqlalchemy.engine.Engine.connect") as engine_mock, patch( "src.taipy.core.data.sql_table.SQLTableDataNode._create_table" ) as create_table_mock: cursor_mock = engine_mock.return_value.__enter__.return_value cursor_mock.execute.side_effect = None - with mock.patch("src.taipy.core.data.sql_table.SQLTableDataNode._delete_all_rows") as mck: + with patch("src.taipy.core.data.sql_table.SQLTableDataNode._delete_all_rows") as mck: dn.write(data) mck.assert_called_once_with(create_table_mock.return_value, cursor_mock) @pytest.mark.parametrize("pandas_properties", __pandas_properties) - @mock.patch("pandas.read_sql_query") + @patch("pandas.read_sql_query") def test_engine_cache(self, _, pandas_properties): dn = SQLTableDataNode( "foo", @@ -405,7 +405,7 @@ def test_engine_cache(self, _, pandas_properties): assert dn._engine is None - with mock.patch("sqlalchemy.engine.Engine.connect") as engine_mock, mock.patch( + with patch("sqlalchemy.engine.Engine.connect") as engine_mock, patch( "src.taipy.core.data.sql_table.SQLTableDataNode._create_table" ): cursor_mock = engine_mock.return_value.__enter__.return_value @@ -435,7 +435,7 @@ def test_sqlite_read_file_with_different_extension(self, tmp_sqlite_path, reques folder_path, db_name, file_extension = tmp_sqlite_path properties = { "db_engine": "sqlite", - "table_name": "foo", + "table_name": "example", "db_name": db_name, "sqlite_folder_path": folder_path, "sqlite_file_extension": file_extension, @@ -450,7 +450,7 @@ def test_filter_pandas_exposed_type(self, tmp_sqlite_sqlite3_file_path): folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path properties = { "db_engine": "sqlite", - "table_name": "foo", + "table_name": "example", "db_name": db_name, "sqlite_folder_path": folder_path, "sqlite_file_extension": file_extension, @@ -462,34 +462,29 @@ def test_filter_pandas_exposed_type(self, tmp_sqlite_sqlite3_file_path): [ {"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, - {"foo": 1}, + {"foo": 1, "bar": 3}, + {"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, - {"bar": 2}, + {"foo": 2, "bar": 3}, ] ) ) # Test datanode indexing and slicing - assert dn["foo"].equals(pd.Series([1, 1, 1, 2, None])) - assert dn["bar"].equals(pd.Series([1, 2, None, 2, 2])) - assert dn[:2].equals(pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}])) + assert dn["foo"].equals(pd.Series([1, 1, 1, 2, 2, 2])) + assert dn["bar"].equals(pd.Series([1, 2, 3, 1, 2, 3])) + assert dn[:2].equals(pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}])) # Test filter data filtered_by_filter_method = dn.filter(("foo", 1, Operator.EQUAL)) filtered_by_indexing = dn[dn["foo"] == 1] - expected_data = pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}, {"foo": 1.0}]) + expected_data = pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, {"foo": 1, "bar": 3}]) assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) assert_frame_equal(filtered_by_indexing.reset_index(drop=True), expected_data) filtered_by_filter_method = dn.filter(("foo", 1, Operator.NOT_EQUAL)) filtered_by_indexing = dn[dn["foo"] != 1] - expected_data = pd.DataFrame([{"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) - assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) - assert_frame_equal(filtered_by_indexing.reset_index(drop=True), expected_data) - - filtered_by_filter_method = dn.filter(("bar", 2, Operator.EQUAL)) - filtered_by_indexing = dn[dn["bar"] == 2] - expected_data = pd.DataFrame([{"foo": 1.0, "bar": 2.0}, {"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) + expected_data = pd.DataFrame([{"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, {"foo": 2, "bar": 3}]) assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) assert_frame_equal(filtered_by_indexing.reset_index(drop=True), expected_data) @@ -497,10 +492,10 @@ def test_filter_pandas_exposed_type(self, tmp_sqlite_sqlite3_file_path): filtered_by_indexing = dn[(dn["bar"] == 1) | (dn["bar"] == 2)] expected_data = pd.DataFrame( [ - {"foo": 1.0, "bar": 1.0}, - {"foo": 1.0, "bar": 2.0}, - {"foo": 2.0, "bar": 2.0}, - {"bar": 2.0}, + {"foo": 1, "bar": 1}, + {"foo": 1, "bar": 2}, + {"foo": 2, "bar": 1}, + {"foo": 2, "bar": 2}, ] ) assert_frame_equal(filtered_by_filter_method.reset_index(drop=True), expected_data) @@ -510,7 +505,7 @@ def test_filter_modin_exposed_type(self, tmp_sqlite_sqlite3_file_path): folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path properties = { "db_engine": "sqlite", - "table_name": "foo", + "table_name": "example", "db_name": db_name, "sqlite_folder_path": folder_path, "sqlite_file_extension": file_extension, @@ -522,34 +517,29 @@ def test_filter_modin_exposed_type(self, tmp_sqlite_sqlite3_file_path): [ {"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, - {"foo": 1}, + {"foo": 1, "bar": 3}, + {"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, - {"bar": 2}, + {"foo": 2, "bar": 3}, ] ) ) # Test datanode indexing and slicing - assert dn["foo"].equals(modin_pd.Series([1, 1, 1, 2, None])) - assert dn["bar"].equals(modin_pd.Series([1, 2, None, 2, 2])) - assert dn[:2].equals(modin_pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}])) + assert dn["foo"].equals(pd.Series([1, 1, 1, 2, 2, 2])) + assert dn["bar"].equals(pd.Series([1, 2, 3, 1, 2, 3])) + assert dn[:2].equals(modin_pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}])) # Test filter data filtered_by_filter_method = dn.filter(("foo", 1, Operator.EQUAL)) filtered_by_indexing = dn[dn["foo"] == 1] - expected_data = modin_pd.DataFrame([{"foo": 1.0, "bar": 1.0}, {"foo": 1.0, "bar": 2.0}, {"foo": 1.0}]) + expected_data = modin_pd.DataFrame([{"foo": 1, "bar": 1}, {"foo": 1, "bar": 2}, {"foo": 1, "bar": 3}]) df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) df_equals(filtered_by_indexing.reset_index(drop=True), expected_data) filtered_by_filter_method = dn.filter(("foo", 1, Operator.NOT_EQUAL)) filtered_by_indexing = dn[dn["foo"] != 1] - expected_data = modin_pd.DataFrame([{"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) - df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) - df_equals(filtered_by_indexing.reset_index(drop=True), expected_data) - - filtered_by_filter_method = dn.filter(("bar", 2, Operator.EQUAL)) - filtered_by_indexing = dn[dn["bar"] == 2] - expected_data = modin_pd.DataFrame([{"foo": 1.0, "bar": 2.0}, {"foo": 2.0, "bar": 2.0}, {"bar": 2.0}]) + expected_data = modin_pd.DataFrame([{"foo": 2, "bar": 1}, {"foo": 2, "bar": 2}, {"foo": 2, "bar": 3}]) df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) df_equals(filtered_by_indexing.reset_index(drop=True), expected_data) @@ -557,10 +547,10 @@ def test_filter_modin_exposed_type(self, tmp_sqlite_sqlite3_file_path): filtered_by_indexing = dn[(dn["bar"] == 1) | (dn["bar"] == 2)] expected_data = modin_pd.DataFrame( [ - {"foo": 1.0, "bar": 1.0}, - {"foo": 1.0, "bar": 2.0}, - {"foo": 2.0, "bar": 2.0}, - {"bar": 2.0}, + {"foo": 1, "bar": 1}, + {"foo": 1, "bar": 2}, + {"foo": 2, "bar": 1}, + {"foo": 2, "bar": 2}, ] ) df_equals(filtered_by_filter_method.reset_index(drop=True), expected_data) @@ -570,7 +560,7 @@ def test_filter_numpy_exposed_type(self, tmp_sqlite_sqlite3_file_path): folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path properties = { "db_engine": "sqlite", - "table_name": "foo", + "table_name": "example", "db_name": db_name, "sqlite_folder_path": folder_path, "sqlite_file_extension": file_extension, @@ -598,17 +588,37 @@ def test_filter_numpy_exposed_type(self, tmp_sqlite_sqlite3_file_path): assert np.array_equal(dn[1:4, :1], np.array([[1], [1], [2]])) # Test filter data - assert np.array_equal(dn.filter((0, 1, Operator.EQUAL)), np.array([[1, 1], [1, 2], [1, 3]])) + assert np.array_equal(dn.filter(("foo", 1, Operator.EQUAL)), np.array([[1, 1], [1, 2], [1, 3]])) assert np.array_equal(dn[dn[:, 0] == 1], np.array([[1, 1], [1, 2], [1, 3]])) - assert np.array_equal(dn.filter((0, 1, Operator.NOT_EQUAL)), np.array([[2, 1], [2, 2], [2, 3]])) + assert np.array_equal(dn.filter(("foo", 1, Operator.NOT_EQUAL)), np.array([[2, 1], [2, 2], [2, 3]])) assert np.array_equal(dn[dn[:, 0] != 1], np.array([[2, 1], [2, 2], [2, 3]])) - assert np.array_equal(dn.filter((1, 2, Operator.EQUAL)), np.array([[1, 2], [2, 2]])) + assert np.array_equal(dn.filter(("bar", 2, Operator.EQUAL)), np.array([[1, 2], [2, 2]])) assert np.array_equal(dn[dn[:, 1] == 2], np.array([[1, 2], [2, 2]])) assert np.array_equal( - dn.filter([(1, 1, Operator.EQUAL), (1, 2, Operator.EQUAL)], JoinOperator.OR), + dn.filter([("bar", 1, Operator.EQUAL), ("bar", 2, Operator.EQUAL)], JoinOperator.OR), np.array([[1, 1], [1, 2], [2, 1], [2, 2]]), ) assert np.array_equal(dn[(dn[:, 1] == 1) | (dn[:, 1] == 2)], np.array([[1, 1], [1, 2], [2, 1], [2, 2]])) + + def test_filter_does_not_read_all_entities(self, tmp_sqlite_sqlite3_file_path): + folder_path, db_name, file_extension = tmp_sqlite_sqlite3_file_path + properties = { + "db_engine": "sqlite", + "table_name": "example", + "db_name": db_name, + "sqlite_folder_path": folder_path, + "sqlite_file_extension": file_extension, + "exposed_type": "numpy", + } + dn = SQLTableDataNode("foo", Scope.SCENARIO, properties=properties) + + # SQLTableDataNode.filter() should not call the MongoCollectionDataNode._read() method + with patch.object(SQLTableDataNode, "_read") as read_mock: + dn.filter(("foo", 1, Operator.EQUAL)) + dn.filter(("bar", 2, Operator.NOT_EQUAL)) + dn.filter([("bar", 1, Operator.EQUAL), ("bar", 2, Operator.EQUAL)], JoinOperator.OR) + + assert read_mock["_read"].call_count == 0