Skip to content

Commit

Permalink
community: Add configurable VisualFeatures to the `AzureAiServicesI…
Browse files Browse the repository at this point in the history
…mageAnalysisTool` (#27444)

Thank you for contributing to LangChain!

- [ ] **PR title**: community: Add configurable `VisualFeatures` to the
`AzureAiServicesImageAnalysisTool`


- [ ] **PR message**:  
- **Description:** The `AzureAiServicesImageAnalysisTool` is a good
service and utilises the Azure AI Vision package under the hood.
However, since the creation of this tool, new `VisualFeatures` have been
added to allow the user to request other image specific information to
be returned. Currently, the tool offers neither configuration of which
features should be return nor does it offer any newer feature types. The
aim of this PR is to address this and expose more of the Azure Service
in this integration.
- **Dependencies:** no new dependencies in the main class file,
azure.ai.vision.imageanalysis added to extra test dependencies file.


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. Although no tests exist for already implemented Azure Service tools,
I've created 3 unit tests for this class that test initialisation and
credentials, local file analysis and a test for the new changes/
features option.


- [ ] **Lint and test**: All linting has passed.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
  • Loading branch information
3 people authored Dec 16, 2024
1 parent 1c120e9 commit 580a8d5
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 13 deletions.
1 change: 1 addition & 0 deletions libs/community/extended_testing_deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ atlassian-python-api>=3.36.0,<4
azure-ai-documentintelligence>=1.0.0b1,<2
azure-identity>=1.15.0,<2
azure-search-documents==11.4.0
azure.ai.vision.imageanalysis>=1.0.0,<2
beautifulsoup4>=4,<5
bibtexparser>=1.4.0,<2
cassio>=0.1.6,<0.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,31 @@ class AzureAiServicesImageAnalysisTool(BaseTool): # type: ignore[override]
"""Tool that queries the Azure AI Services Image Analysis API.
In order to set this up, follow instructions at:
https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
https://learn.microsoft.com/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
Attributes:
azure_ai_services_key (Optional[str]): The API key for Azure AI Services.
azure_ai_services_endpoint (Optional[str]): The endpoint URL for Azure AI Services.
visual_features Any: The visual features to analyze in the image, can be set as
either strings or azure.ai.vision.imageanalysis.models.VisualFeatures.
(e.g. 'TAGS', VisualFeatures.CAPTION).
image_analysis_client (Any): The client for interacting
with Azure AI Services Image Analysis.
name (str): The name of the tool.
description (str): A description of the tool,
including its purpose and expected input.
"""

azure_ai_services_key: str = "" #: :meta private:
azure_ai_services_endpoint: str = "" #: :meta private:
image_analysis_client: Any #: :meta private:
visual_features: Any #: :meta private:
azure_ai_services_key: Optional[str] = None #: :meta private:
azure_ai_services_endpoint: Optional[str] = None #: :meta private:
visual_features: Any = None
image_analysis_client: Any = None #: :meta private:

name: str = "azure_ai_services_image_analysis"
description: str = (
"A wrapper around Azure AI Services Image Analysis. "
"Useful for when you need to analyze images. "
"Input should be a url to an image."
"Input must be a url string or path string to an image."
)

@model_validator(mode="before")
Expand Down Expand Up @@ -68,13 +80,16 @@ def validate_environment(cls, values: Dict) -> Any:
f"Initialization of Azure AI Vision Image Analysis client failed: {e}"
)

values["visual_features"] = [
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.CAPTION,
VisualFeatures.READ,
]

visual_features = values.get(
"visual_features",
[
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.CAPTION,
VisualFeatures.READ,
],
)
values["visual_features"] = visual_features
return values

def _image_analysis(self, image_path: str) -> Dict:
Expand Down Expand Up @@ -115,6 +130,17 @@ def _image_analysis(self, image_path: str) -> Dict:
if result.read is not None and len(result.read.blocks) > 0:
res_dict["text"] = [line.text for line in result.read.blocks[0].lines]

if result.dense_captions is not None and len(result.dense_captions) > 0:
res_dict["dense_captions"] = [
str(dc) for dc in result.dense_captions.list
]

if result.smart_crops is not None and len(result.smart_crops) > 0:
res_dict["smart_crops"] = [str(sc) for sc in result.smart_crops.list]

if result.people is not None and len(result.people) > 0:
res_dict["people"] = [str(p) for p in result.people.list]

return res_dict

def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
Expand All @@ -136,6 +162,21 @@ def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
if "text" in image_analysis_result and len(image_analysis_result["text"]) > 0:
formatted_result.append("Text: " + ", ".join(image_analysis_result["text"]))

if "dense_captions" in image_analysis_result:
formatted_result.append(
"Dense Captions: " + ", ".join(image_analysis_result["dense_captions"])
)

if "smart_crops" in image_analysis_result:
formatted_result.append(
"Smart Crops: " + ", ".join(image_analysis_result["smart_crops"])
)

if "people" in image_analysis_result:
formatted_result.append(
"People: " + ", ".join(image_analysis_result["people"])
)

return "\n".join(formatted_result)

def _run(
Expand Down
Binary file added libs/community/tests/examples/building.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Tests for the Azure AI Services Image Analysis Tool."""

from pathlib import Path
from typing import Any

import pytest

from langchain_community.tools.azure_ai_services.image_analysis import (
AzureAiServicesImageAnalysisTool,
)

this_dir = Path(__file__).parents[3]

examples_dir = this_dir / "examples"
building_path = examples_dir / "building.jpg"


@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_content_safety(mocker: Any) -> None:
mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)

key = "key"
endpoint = "endpoint"

tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key, azure_ai_services_endpoint=endpoint
)
assert tool.azure_ai_services_key == key
assert tool.azure_ai_services_endpoint == endpoint


@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_local_image_analysis(mocker: Any) -> None:
key = "key"
endpoint = "endpoint"

mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
mocker.patch(
"langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
return_value="local",
)

tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key,
azure_ai_services_endpoint=endpoint,
visual_features=["CAPTION"],
)

mock_content_client = mocker.Mock()
mock_content_client.analyze.return_value = mocker.Mock()
mock_content_client.analyze.return_value.caption.text = "A building corner."

mock_content_client.analyze.return_value.objects = None
mock_content_client.analyze.return_value.tags = None
mock_content_client.analyze.return_value.read = None
mock_content_client.analyze.return_value.dense_captions = None
mock_content_client.analyze.return_value.smart_crops = None
mock_content_client.analyze.return_value.people = None

tool.image_analysis_client = mock_content_client

input = str(building_path)
output = "Caption: A building corner."

result = tool._run(input)
assert result == output


@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_local_image_different_features(mocker: Any) -> None:
key = "key"
endpoint = "endpoint"

mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
mocker.patch(
"langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
return_value="local",
)

tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key,
azure_ai_services_endpoint=endpoint,
visual_features=["PEOPLE", "CAPTION", "SMARTCROPS"],
)

mock_content_client = mocker.Mock()
mock_content_client.analyze.return_value = mocker.Mock()
mock_content_client.analyze.return_value.caption.text = "A building corner."

mock_content_client.analyze.return_value.objects = None
mock_content_client.analyze.return_value.tags = None
mock_content_client.analyze.return_value.read = None
mock_content_client.analyze.return_value.dense_captions = None

mock_smart_crops = mocker.MagicMock()
mock_smart_crops.list = [
{"aspectRatio": 1.97, "boundingBox": {"x": 43, "y": 24, "w": 853, "h": 432}}
]
mock_smart_crops.__len__.return_value = 1
mock_content_client.analyze.return_value.smart_crops = mock_smart_crops

mock_people = mocker.MagicMock()
mock_people.list = [
{
"boundingBox": {"x": 454, "y": 44, "w": 408, "h": 531},
"confidence": 0.9601945281028748,
},
]
mock_people.__len__.return_value = 1
mock_content_client.analyze.return_value.people = mock_people

tool.image_analysis_client = mock_content_client

input = str(building_path)
output = (
"Caption: A building corner.\n"
"Smart Crops: {'aspectRatio': 1.97,"
" 'boundingBox': {'x': 43, 'y': 24, 'w': 853, 'h': 432}}\n"
"People: {'boundingBox': {'x': 454, 'y': 44, 'w': 408, 'h': 531},"
" 'confidence': 0.9601945281028748}"
)

result = tool._run(input)
assert result == output

0 comments on commit 580a8d5

Please sign in to comment.