From 79d67b0338cfea7155c196cf4053f6e35ec0b56b Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 11 Jan 2024 12:04:25 +0100 Subject: [PATCH] expand example to use bytestream (#6718) --- examples/pipelines/indexing_pipeline_with_meta.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/pipelines/indexing_pipeline_with_meta.py b/examples/pipelines/indexing_pipeline_with_meta.py index de5cbe4882..1ff6130c97 100644 --- a/examples/pipelines/indexing_pipeline_with_meta.py +++ b/examples/pipelines/indexing_pipeline_with_meta.py @@ -1,8 +1,10 @@ from typing import Dict, Any from pathlib import Path from datetime import datetime +import os from haystack import Pipeline +from haystack.dataclasses import ByteStream from haystack.components.others import Multiplexer from haystack.components.converters import PyPDFToDocument, TextFileToDocument from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter @@ -34,9 +36,20 @@ p.connect("cleaner.documents", "splitter.documents") p.connect("splitter.documents", "writer.documents") +# Add metadata to your files by using ByteStream +sources = [] +for position, path in enumerate(list(Path(".").iterdir())): + if path.is_file(): + # Create the ByteStream + source = ByteStream.from_file_path(path) + # Add the metadata + source.meta["path"] = path + source.meta["position"] = position + sources.append(source) + result = p.run( { - "file_type_router": {"sources": list(Path(".").iterdir())}, + "file_type_router": {"sources": sources}, "metadata_multiplexer": {"value": {"date_added": datetime.now().isoformat()}}, } )