Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

swarm - fixed errors in texblob and file path to base 64 #737

Merged
merged 1 commit into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions pkgs/swarmauri/swarmauri/parsers/concrete/TextBlobNounParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,17 @@ class TextBlobNounParser(ParserBase):
type: Literal["TextBlobNounParser"] = "TextBlobNounParser"

def __init__(self, **kwargs):
import nltk

nltk.download("punkt_tab")
super().__init__(**kwargs)
try:
import nltk

# Download required NLTK data
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("brown")
nltk.download("wordnet")
super().__init__(**kwargs)
except Exception as e:
raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")

def parse(self, data: Union[str, Any]) -> List[Document]:
"""
Expand All @@ -35,15 +42,16 @@ def parse(self, data: Union[str, Any]) -> List[Document]:
if not isinstance(data, str):
raise ValueError("TextBlobParser expects a string as input data.")

# Use TextBlob for NLP tasks
blob = TextBlob(data)
try:
# Use TextBlob for NLP tasks
blob = TextBlob(data)

# Extracts noun phrases to demonstrate one of TextBlob's capabilities.
# In practice, this parser could be expanded to include more sophisticated processing.
noun_phrases = list(blob.noun_phrases)
# Extracts noun phrases to demonstrate one of TextBlob's capabilities.
noun_phrases = list(blob.noun_phrases)

# Example: Wrap the extracted noun phrases into an IDocument instance
# In real scenarios, you might want to include more details, like sentiment, POS tags, etc.
document = Document(content=data, metadata={"noun_phrases": noun_phrases})
# Create document with extracted information
document = Document(content=data, metadata={"noun_phrases": noun_phrases})

return [document]
return [document]
except Exception as e:
raise RuntimeError(f"Error during text parsing: {str(e)}")
45 changes: 32 additions & 13 deletions pkgs/swarmauri/tests/unit/parsers/TextBlobNounParser_unit_test.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,43 @@
import pytest
from swarmauri.parsers.concrete.TextBlobNounParser import TextBlobNounParser as Parser


def setup_module(module):
"""Setup any state specific to the execution of the given module."""
try:
# Initialize a parser to trigger NLTK downloads
Parser()
except Exception as e:
pytest.skip(f"Failed to initialize NLTK resources: {str(e)}")


@pytest.fixture(scope="module")
def parser():
"""Fixture to provide a parser instance for tests."""
return Parser()


@pytest.mark.unit
def test_ubc_resource():
parser = Parser()
assert parser.resource == 'Parser'
def test_ubc_resource(parser):
assert parser.resource == "Parser"


@pytest.mark.unit
def test_ubc_type():
parser = Parser()
assert parser.type == 'TextBlobNounParser'
def test_ubc_type(parser):
assert parser.type == "TextBlobNounParser"


@pytest.mark.unit
def test_serialization():
parser = Parser()
def test_serialization(parser):
assert parser.id == Parser.model_validate_json(parser.model_dump_json()).id


@pytest.mark.unit
def test_parse():
documents = Parser().parse('One more large chapula please.')
assert documents[0].resource == 'Document'
assert documents[0].content == 'One more large chapula please.'
assert documents[0].metadata['noun_phrases'] == ['large chapula']
def test_parse(parser):
try:
documents = parser.parse("One more large chapula please.")
assert documents[0].resource == "Document"
assert documents[0].content == "One more large chapula please."
assert documents[0].metadata["noun_phrases"] == ["large chapula"]
except Exception as e:
pytest.fail(f"Parser failed with error: {str(e)}")
7 changes: 6 additions & 1 deletion pkgs/swarmauri/tests/unit/utils/file_path_to_base64_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import pytest
import base64
from swarmauri.utils.file_path_to_base64 import file_path_to_base64
import os
from pathlib import Path

test_image_path = "pkgs/swarmauri/tests/static/cityscape.png"
# Get the current working directory
root_dir = Path(__file__).resolve().parents[2]

test_image_path = os.path.join(root_dir, "static", "cityscape.png")


def test_file_path_to_base64():
Expand Down