Skip to content
This repository has been archived by the owner on Jan 2, 2024. It is now read-only.

Feature/#755 - Open append() method on datanodes #824

Merged
merged 6 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/taipy/core/data/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ def _read_as_pandas_dataframe(self, read_kwargs: Dict) -> pd.DataFrame:
def _read_as_modin_dataframe(self, read_kwargs: Dict) -> modin_pd.DataFrame:
return modin_pd.read_parquet(self._path, **read_kwargs)

def _append(self, data: Any):
self.write_with_kwargs(data, engine="fastparquet", append=True)

def _write(self, data: Any):
self.write_with_kwargs(data)

Expand Down
44 changes: 44 additions & 0 deletions tests/core/data/test_parquet_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,50 @@ def test_get_system_folder_modified_date_instead_of_last_edit_date(self, tmpdir_

os.unlink(temp_file_path)

@pytest.mark.parametrize(
"content",
[
([{"a": 11, "b": 22, "c": 33}, {"a": 44, "b": 55, "c": 66}]),
(pd.DataFrame([{"a": 11, "b": 22, "c": 33}, {"a": 44, "b": 55, "c": 66}])),
],
)
def test_append_pandas(self, parquet_file_path, default_data_frame, content):
# !!! append data only works with `fastparquet`
if not util.find_spec("fastparquet"):
pass
jrobinAV marked this conversation as resolved.
Show resolved Hide resolved

dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": parquet_file_path})
assert_frame_equal(dn.read(), default_data_frame)

dn.append(content)
assert_frame_equal(
dn.read(),
pd.concat([default_data_frame, pd.DataFrame(content, columns=["a", "b", "c"])]).reset_index(drop=True),
)
jrobinAV marked this conversation as resolved.
Show resolved Hide resolved

@pytest.mark.parametrize(
"content",
[
([{"a": 11, "b": 22, "c": 33}, {"a": 44, "b": 55, "c": 66}]),
(pd.DataFrame([{"a": 11, "b": 22, "c": 33}, {"a": 44, "b": 55, "c": 66}])),
],
)
def test_append_modin(self, parquet_file_path, default_data_frame, content):
# !!! append data only works with `fastparquet`
if not util.find_spec("fastparquet"):
pass

dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": parquet_file_path, "exposed_type": "modin"})
df_equals(dn.read(), modin_pd.DataFrame(default_data_frame))

dn.append(content)
df_equals(
dn.read(),
modin_pd.concat([default_data_frame, pd.DataFrame(content, columns=["a", "b", "c"])]).reset_index(
drop=True
),
)

@pytest.mark.parametrize(
"data",
[
Expand Down