From e8a2280119ee53f21930c50974810283802e594c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 31 Jan 2024 12:37:14 +0100 Subject: [PATCH 1/5] add params --- haystack/dataclasses/byte_stream.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/haystack/dataclasses/byte_stream.py b/haystack/dataclasses/byte_stream.py index 6ccf324640..07bf596972 100644 --- a/haystack/dataclasses/byte_stream.py +++ b/haystack/dataclasses/byte_stream.py @@ -14,25 +14,38 @@ class ByteStream: mime_type: Optional[str] = field(default=None) def to_file(self, destination_path: Path): + """ + Write the ByteStream to a file. Note: the metadata will be lost. + + :param destination_path: The path to write the ByteStream to. + """ with open(destination_path, "wb") as fd: fd.write(self.data) @classmethod - def from_file_path(cls, filepath: Path, mime_type: Optional[str] = None) -> "ByteStream": + def from_file_path( + cls, filepath: Path, mime_type: Optional[str] = None, meta: Optional[Dict[str, Any]] = None + ) -> "ByteStream": """ Create a ByteStream from the contents read from a file. :param filepath: A valid path to a file. + :param mime_type: The mime type of the file. + :param meta: Additional metadata to be stored with the ByteStream. """ with open(filepath, "rb") as fd: - return cls(data=fd.read(), mime_type=mime_type) + return cls(data=fd.read(), mime_type=mime_type, meta=meta) @classmethod - def from_string(cls, text: str, encoding: str = "utf-8", mime_type: Optional[str] = None) -> "ByteStream": + def from_string( + cls, text: str, encoding: str = "utf-8", mime_type: Optional[str] = None, meta: Optional[Dict[str, Any]] = None + ) -> "ByteStream": """ Create a ByteStream encoding a string. :param text: The string to encode :param encoding: The encoding used to convert the string into bytes + :param mime_type: The mime type of the file. + :param meta: Additional metadata to be stored with the ByteStream. """ - return cls(data=text.encode(encoding), mime_type=mime_type) + return cls(data=text.encode(encoding), mime_type=mime_type, meta=meta) From b226b8a56bcacde91ff15c73efeaefaee668991e Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 31 Jan 2024 12:38:24 +0100 Subject: [PATCH 2/5] add tests --- test/dataclasses/test_byte_stream.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/dataclasses/test_byte_stream.py b/test/dataclasses/test_byte_stream.py index 62aa1add6d..57d444b038 100644 --- a/test/dataclasses/test_byte_stream.py +++ b/test/dataclasses/test_byte_stream.py @@ -1,9 +1,5 @@ -import io - from haystack.dataclasses import ByteStream -import pytest - def test_from_file_path(tmp_path, request): test_bytes = "Hello, world!\n".encode() @@ -19,6 +15,10 @@ def test_from_file_path(tmp_path, request): assert b.data == test_bytes assert b.mime_type == "text/plain" + b = ByteStream.from_file_path(test_path, meta={"foo": "bar"}) + assert b.data == test_bytes + assert b.meta == {"foo": "bar"} + def test_from_string(): test_string = "Hello, world!" @@ -30,6 +30,10 @@ def test_from_string(): assert b.data.decode() == test_string assert b.mime_type == "text/plain" + b = ByteStream.from_string(test_string, meta={"foo": "bar"}) + assert b.data.decode() == test_string + assert b.meta == {"foo": "bar"} + def test_to_file(tmp_path, request): test_str = "Hello, world!\n" From 5ec2ac2f42b98822b1998050bb4ce10c633e83ab Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 31 Jan 2024 12:41:56 +0100 Subject: [PATCH 3/5] reno --- releasenotes/notes/meta-in-bytestream-a29816c919c0be5a.yaml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 releasenotes/notes/meta-in-bytestream-a29816c919c0be5a.yaml diff --git a/releasenotes/notes/meta-in-bytestream-a29816c919c0be5a.yaml b/releasenotes/notes/meta-in-bytestream-a29816c919c0be5a.yaml new file mode 100644 index 0000000000..e116c73fbc --- /dev/null +++ b/releasenotes/notes/meta-in-bytestream-a29816c919c0be5a.yaml @@ -0,0 +1,3 @@ +--- +enhancements: + - Add meta parameter to `ByteStream.from_file_path()` and `ByteStream.from_string()`. From 58cb60f7173fbb93a220dc2640ae3f25910ab1ae Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 31 Jan 2024 12:59:36 +0100 Subject: [PATCH 4/5] add default --- haystack/dataclasses/byte_stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/haystack/dataclasses/byte_stream.py b/haystack/dataclasses/byte_stream.py index 07bf596972..51542f3746 100644 --- a/haystack/dataclasses/byte_stream.py +++ b/haystack/dataclasses/byte_stream.py @@ -34,7 +34,7 @@ def from_file_path( :param meta: Additional metadata to be stored with the ByteStream. """ with open(filepath, "rb") as fd: - return cls(data=fd.read(), mime_type=mime_type, meta=meta) + return cls(data=fd.read(), mime_type=mime_type, meta=meta or {}) @classmethod def from_string( @@ -48,4 +48,4 @@ def from_string( :param mime_type: The mime type of the file. :param meta: Additional metadata to be stored with the ByteStream. """ - return cls(data=text.encode(encoding), mime_type=mime_type, meta=meta) + return cls(data=text.encode(encoding), mime_type=mime_type, meta=meta or {}) From 3f52238c34333b36387f0de52fe039b13df2f307 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 1 Feb 2024 12:24:31 +0100 Subject: [PATCH 5/5] defreeze --- haystack/dataclasses/byte_stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/dataclasses/byte_stream.py b/haystack/dataclasses/byte_stream.py index 51542f3746..80b1c50c3b 100644 --- a/haystack/dataclasses/byte_stream.py +++ b/haystack/dataclasses/byte_stream.py @@ -3,7 +3,7 @@ from typing import Optional, Dict, Any -@dataclass(frozen=True) +@dataclass class ByteStream: """ Base data class representing a binary object in the Haystack API.