deepset-ai · ZanSara · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023
@@ -5,7 +5,7 @@
 
 from haystack import Document, component
 from haystack.dataclasses import ByteStream
-from haystack.components.converters.utils import get_bytestream_from_source
+from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata
 
 logger = logging.getLogger(__name__)
 
@@ -55,22 +55,21 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D
         Converts a list of HTML files to Documents.
 
         :param sources: List of HTML file paths or ByteStream objects.
-        :param meta: Optional list of metadata to attach to the Documents.
-          The length of the list must match the number of sources. Defaults to `None`.
+        :param meta: Optional metadata to attach to the Documents.
+          This value can be either a list of dictionaries or a single dictionary.
+          If it's a single dictionary, its content is added to the metadata of all produced Documents.
+          If it's a list, the length of the list must match the number of sources, because the two lists will be zipped.
+          Defaults to `None`.
         :return: A dictionary containing a list of Document objects under the 'documents' key.
         """
 
         documents = []
-
-        if meta is None:
-            meta = [{}] * len(sources)
-        elif len(sources) != len(meta):
-            raise ValueError("The length of the metadata list must match the number of sources.")
+        meta_list = normalize_metadata(meta=meta, sources_count=len(sources))
 
         extractor_class = getattr(extractors, self.extractor_type)
         extractor = extractor_class(raise_on_failure=False)
 
-        for source, metadata in zip(sources, meta):
+        for source, metadata in zip(sources, meta_list):
             try:
                 bytestream = get_bytestream_from_source(source=source)
             except Exception as e:

@@ -0,0 +1,4 @@
+---
+enhancements:
+  - |
+    Adds support for single metadata dictionary input in `HTMLToDocument`.
@@ -13,10 +13,11 @@ def test_run(self, test_files_path):
         """
         sources = [test_files_path / "html" / "what_is_haystack.html"]
         converter = HTMLToDocument()
-        results = converter.run(sources=sources)
+        results = converter.run(sources=sources, meta={"test": "TEST"})
         docs = results["documents"]
         assert len(docs) == 1
         assert "Haystack" in docs[0].content
+        assert docs[0].meta["test"] == "TEST"
 
     def test_run_different_extractors(self, test_files_path):
         """