swarmauri · cobycloud · Oct 29, 2024 · Oct 29, 2024
diff --git a/pkgs/swarmauri/swarmauri/parsers/concrete/TextBlobNounParser.py b/pkgs/swarmauri/swarmauri/parsers/concrete/TextBlobNounParser.py
@@ -15,10 +15,17 @@ class TextBlobNounParser(ParserBase):
     type: Literal["TextBlobNounParser"] = "TextBlobNounParser"
 
     def __init__(self, **kwargs):
-        import nltk
-
-        nltk.download("punkt_tab")
-        super().__init__(**kwargs)
+        try:
+            import nltk
+
+            # Download required NLTK data
+            nltk.download("punkt")
+            nltk.download("averaged_perceptron_tagger")
+            nltk.download("brown")
+            nltk.download("wordnet")
+            super().__init__(**kwargs)
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")
 
     def parse(self, data: Union[str, Any]) -> List[Document]:
         """
@@ -35,15 +42,16 @@ def parse(self, data: Union[str, Any]) -> List[Document]:
         if not isinstance(data, str):
             raise ValueError("TextBlobParser expects a string as input data.")
 
-        # Use TextBlob for NLP tasks
-        blob = TextBlob(data)
+        try:
+            # Use TextBlob for NLP tasks
+            blob = TextBlob(data)
 
-        # Extracts noun phrases to demonstrate one of TextBlob's capabilities.
-        # In practice, this parser could be expanded to include more sophisticated processing.
-        noun_phrases = list(blob.noun_phrases)
+            # Extracts noun phrases to demonstrate one of TextBlob's capabilities.
+            noun_phrases = list(blob.noun_phrases)
 
-        # Example: Wrap the extracted noun phrases into an IDocument instance
-        # In real scenarios, you might want to include more details, like sentiment, POS tags, etc.
-        document = Document(content=data, metadata={"noun_phrases": noun_phrases})
+            # Create document with extracted information
+            document = Document(content=data, metadata={"noun_phrases": noun_phrases})
 
-        return [document]
+            return [document]
+        except Exception as e:
+            raise RuntimeError(f"Error during text parsing: {str(e)}")
diff --git a/pkgs/swarmauri/tests/unit/parsers/TextBlobNounParser_unit_test.py b/pkgs/swarmauri/tests/unit/parsers/TextBlobNounParser_unit_test.py
@@ -1,24 +1,43 @@
 import pytest
 from swarmauri.parsers.concrete.TextBlobNounParser import TextBlobNounParser as Parser
 
+
+def setup_module(module):
+    """Setup any state specific to the execution of the given module."""
+    try:
+        # Initialize a parser to trigger NLTK downloads
+        Parser()
+    except Exception as e:
+        pytest.skip(f"Failed to initialize NLTK resources: {str(e)}")
+
+
+@pytest.fixture(scope="module")
+def parser():
+    """Fixture to provide a parser instance for tests."""
+    return Parser()
+
+
 @pytest.mark.unit
-def test_ubc_resource():
-    parser = Parser()
-    assert parser.resource == 'Parser'
+def test_ubc_resource(parser):
+    assert parser.resource == "Parser"
+
 
 @pytest.mark.unit
-def test_ubc_type():
-    parser = Parser()
-    assert parser.type == 'TextBlobNounParser'
+def test_ubc_type(parser):
+    assert parser.type == "TextBlobNounParser"
+
 
 @pytest.mark.unit
-def test_serialization():
-    parser = Parser()
+def test_serialization(parser):
     assert parser.id == Parser.model_validate_json(parser.model_dump_json()).id
 
+
 @pytest.mark.unit
-def test_parse():
-    documents = Parser().parse('One more large chapula please.')
-    assert documents[0].resource == 'Document'
-    assert documents[0].content == 'One more large chapula please.'
-    assert documents[0].metadata['noun_phrases'] == ['large chapula']
+def test_parse(parser):
+    try:
+        documents = parser.parse("One more large chapula please.")
+        assert documents[0].resource == "Document"
+        assert documents[0].content == "One more large chapula please."
+        assert documents[0].metadata["noun_phrases"] == ["large chapula"]
+    except Exception as e:
+        pytest.fail(f"Parser failed with error: {str(e)}")
diff --git a/pkgs/swarmauri/tests/unit/utils/file_path_to_base64_test.py b/pkgs/swarmauri/tests/unit/utils/file_path_to_base64_test.py
@@ -1,8 +1,13 @@
 import pytest
 import base64
 from swarmauri.utils.file_path_to_base64 import file_path_to_base64
+import os
+from pathlib import Path
 
-test_image_path = "pkgs/swarmauri/tests/static/cityscape.png"
+# Get the current working directory
+root_dir = Path(__file__).resolve().parents[2]
+
+test_image_path = os.path.join(root_dir, "static", "cityscape.png")
 
 
 def test_file_path_to_base64():