Skip to content

Commit

Permalink
expand example to use bytestream (#6718)
Browse files Browse the repository at this point in the history
  • Loading branch information
ZanSara authored Jan 11, 2024
1 parent e1ec4e5 commit 79d67b0
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion examples/pipelines/indexing_pipeline_with_meta.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Dict, Any
from pathlib import Path
from datetime import datetime
import os

from haystack import Pipeline
from haystack.dataclasses import ByteStream
from haystack.components.others import Multiplexer
from haystack.components.converters import PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
Expand Down Expand Up @@ -34,9 +36,20 @@
p.connect("cleaner.documents", "splitter.documents")
p.connect("splitter.documents", "writer.documents")

# Add metadata to your files by using ByteStream
sources = []
for position, path in enumerate(list(Path(".").iterdir())):
if path.is_file():
# Create the ByteStream
source = ByteStream.from_file_path(path)
# Add the metadata
source.meta["path"] = path
source.meta["position"] = position
sources.append(source)

result = p.run(
{
"file_type_router": {"sources": list(Path(".").iterdir())},
"file_type_router": {"sources": sources},
"metadata_multiplexer": {"value": {"date_added": datetime.now().isoformat()}},
}
)
Expand Down

0 comments on commit 79d67b0

Please sign in to comment.