diff --git a/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj b/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj
index c4e41261933..3c2b5166988 100644
--- a/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj
+++ b/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj
@@ -16,4 +16,10 @@
+
+
+
+ PreserveNewest
+
+
diff --git a/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs b/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs
new file mode 100644
index 00000000000..7a3422cb863
--- /dev/null
+++ b/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs
@@ -0,0 +1,62 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Example15_ImageMessage.cs
+
+using AutoGen.Core;
+using AutoGen.OpenAI;
+
+namespace AutoGen.BasicSample;
+
+///
+/// This example shows usage of ImageMessage. The image is loaded as BinaryData and sent to GPT-4V
+///
+///
+/// Add additional images to the ImageResources to load and send more images to GPT-4V
+///
+public static class Example15_GPT4V_BinaryDataImageMessage
+{
+ private static readonly string ImageResourcePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ImageResources");
+
+ private static Dictionary _mediaTypeMappings = new()
+ {
+ { ".png", "image/png" },
+ { ".jpeg", "image/jpeg" },
+ { ".jpg", "image/jpeg" },
+ { ".gif", "image/gif" },
+ { ".webp", "image/webp" }
+ };
+
+ public static async Task RunAsync()
+ {
+ var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable.");
+ var openAiConfig = new OpenAIConfig(openAIKey, "gpt-4-vision-preview");
+
+ var visionAgent = new GPTAgent(
+ name: "gpt",
+ systemMessage: "You are a helpful AI assistant",
+ config: openAiConfig,
+ temperature: 0);
+
+ List messages =
+ [new TextMessage(Role.User, "What is this image?", from: "user")];
+ AddMessagesFromResource(ImageResourcePath, messages);
+
+ var multiModalMessage = new MultiModalMessage(Role.User, messages, from: "user");
+ var response = await visionAgent.SendAsync(multiModalMessage);
+ }
+
+ private static void AddMessagesFromResource(string imageResourcePath, List messages)
+ {
+ foreach (string file in Directory.GetFiles(imageResourcePath))
+ {
+ if (!_mediaTypeMappings.TryGetValue(Path.GetExtension(file).ToLowerInvariant(), out var mediaType))
+ continue;
+
+ using var fs = new FileStream(file, FileMode.Open, FileAccess.Read);
+ var ms = new MemoryStream();
+ fs.CopyTo(ms);
+ ms.Seek(0, SeekOrigin.Begin);
+ var imageData = BinaryData.FromStream(ms, mediaType);
+ messages.Add(new ImageMessage(Role.Assistant, imageData, from: "user"));
+ }
+ }
+}
diff --git a/dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png b/dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png
new file mode 100644
index 00000000000..afb4f4cd4df
--- /dev/null
+++ b/dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8323d0b8eceb752e14c29543b2e28bb2fc648ed9719095c31b7708867a4dc918
+size 491
diff --git a/dotnet/src/AutoGen.Core/AutoGen.Core.csproj b/dotnet/src/AutoGen.Core/AutoGen.Core.csproj
index 409b6bc1aaf..ebbec3f0a46 100644
--- a/dotnet/src/AutoGen.Core/AutoGen.Core.csproj
+++ b/dotnet/src/AutoGen.Core/AutoGen.Core.csproj
@@ -16,6 +16,7 @@
+
diff --git a/dotnet/src/AutoGen.Core/Message/ImageMessage.cs b/dotnet/src/AutoGen.Core/Message/ImageMessage.cs
index 18ceea0d111..1239785c411 100644
--- a/dotnet/src/AutoGen.Core/Message/ImageMessage.cs
+++ b/dotnet/src/AutoGen.Core/Message/ImageMessage.cs
@@ -21,14 +21,41 @@ public ImageMessage(Role role, Uri uri, string? from = null)
this.Url = uri.ToString();
}
+ public ImageMessage(Role role, BinaryData data, string? from = null)
+ {
+ if (data.IsEmpty)
+ {
+ throw new ArgumentException("Data cannot be empty", nameof(data));
+ }
+
+ if (string.IsNullOrWhiteSpace(data.MediaType))
+ {
+ throw new ArgumentException("MediaType is needed for DataUri Images", nameof(data));
+ }
+
+ this.Role = role;
+ this.From = from;
+ this.Data = data;
+ }
+
public Role Role { get; set; }
- public string Url { get; set; }
+ public string? Url { get; set; }
public string? From { get; set; }
+ public BinaryData? Data { get; set; }
+
+ public string BuildDataUri()
+ {
+ if (this.Data is null)
+ throw new NullReferenceException($"{nameof(Data)}");
+
+ return $"data:{this.Data.MediaType};base64,{Convert.ToBase64String(this.Data.ToArray())}";
+ }
+
public override string ToString()
{
- return $"ImageMessage({this.Role}, {this.Url}, {this.From})";
+ return $"ImageMessage({this.Role}, {(this.Data != null ? BuildDataUri() : this.Url) ?? string.Empty}, {this.From})";
}
}
diff --git a/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs b/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs
index ff241f8d340..13e29e7139b 100644
--- a/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs
+++ b/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) Microsoft Corporation. All rights reserved.
// ChatCompletionResponse.cs
using System.Collections.Generic;
diff --git a/dotnet/src/AutoGen.Mistral/DTOs/Error.cs b/dotnet/src/AutoGen.Mistral/DTOs/Error.cs
index 77eb2d341fb..8bddcfc776c 100644
--- a/dotnet/src/AutoGen.Mistral/DTOs/Error.cs
+++ b/dotnet/src/AutoGen.Mistral/DTOs/Error.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) Microsoft Corporation. All rights reserved.
// Error.cs
using System.Text.Json.Serialization;
diff --git a/dotnet/src/AutoGen.Mistral/DTOs/Model.cs b/dotnet/src/AutoGen.Mistral/DTOs/Model.cs
index 915d2f737ec..70a4b3c997d 100644
--- a/dotnet/src/AutoGen.Mistral/DTOs/Model.cs
+++ b/dotnet/src/AutoGen.Mistral/DTOs/Model.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
+// Copyright (c) Microsoft Corporation. All rights reserved.
// Model.cs
using System;
diff --git a/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs b/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs
index 92e0f3776f5..b3dfb1e8668 100644
--- a/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs
+++ b/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs
@@ -77,7 +77,7 @@ public static IEnumerable ToOpenAIChatRequestMessage(this IA
else if (message is ImageMessage imageMessage)
{
// multi-modal
- var msg = new ChatRequestUserMessage(new ChatMessageImageContentItem(new Uri(imageMessage.Url)));
+ var msg = new ChatRequestUserMessage(new ChatMessageImageContentItem(new Uri(imageMessage.Url ?? imageMessage.BuildDataUri())));
return [msg];
}
@@ -101,7 +101,7 @@ public static IEnumerable ToOpenAIChatRequestMessage(this IA
return m switch
{
TextMessage textMessage => new ChatMessageTextContentItem(textMessage.Content),
- ImageMessage imageMessage => new ChatMessageImageContentItem(new Uri(imageMessage.Url)),
+ ImageMessage imageMessage => new ChatMessageImageContentItem(new Uri(imageMessage.Url ?? imageMessage.BuildDataUri())),
_ => throw new ArgumentException($"Unknown message type: {m.GetType()}")
};
});
diff --git a/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs b/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs
index c1581cbec08..1276e93f9fb 100644
--- a/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs
+++ b/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs
@@ -336,7 +336,7 @@ private IEnumerable ProcessIncomingMessagesForOther(TextMess
private IEnumerable ProcessIncomingMessagesForOther(ImageMessage message)
{
return new[] { new ChatRequestUserMessage([
- new ChatMessageImageContentItem(new Uri(message.Url)),
+ new ChatMessageImageContentItem(new Uri(message.Url ?? message.BuildDataUri())),
])};
}
@@ -345,7 +345,7 @@ private IEnumerable ProcessIncomingMessagesForOther(MultiMod
IEnumerable items = message.Content.Select(ci => ci switch
{
TextMessage text => new ChatMessageTextContentItem(text.Content),
- ImageMessage image => new ChatMessageImageContentItem(new Uri(image.Url)),
+ ImageMessage image => new ChatMessageImageContentItem(new Uri(image.Url ?? image.BuildDataUri())),
_ => throw new NotImplementedException(),
});
diff --git a/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs b/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs
index e4b7527cd05..557683c9615 100644
--- a/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs
+++ b/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs
@@ -92,7 +92,7 @@ private IMessage PostProcessMessage(IMessage messageEnvelope
{
TextContent txt => new TextMessage(Role.Assistant, txt.Text!, messageEnvelope.From),
ImageContent img when img.Uri is Uri uri => new ImageMessage(Role.Assistant, uri.ToString(), from: messageEnvelope.From),
- ImageContent img when img.Uri is null => throw new InvalidOperationException("ImageContent.Uri is null"),
+ ImageContent img when img.Data is ReadOnlyMemory data => new ImageMessage(Role.Assistant, BinaryData.FromBytes(data), from: messageEnvelope.From),
_ => throw new InvalidOperationException("Unsupported content type"),
});
@@ -185,9 +185,8 @@ private IEnumerable ProcessMessageForOthers(TextMessage mess
private IEnumerable ProcessMessageForOthers(ImageMessage message)
{
- var imageContent = new ImageContent(new Uri(message.Url));
var collectionItems = new ChatMessageContentItemCollection();
- collectionItems.Add(imageContent);
+ collectionItems.Add(new ImageContent(new Uri(message.Url ?? message.BuildDataUri())));
return [new ChatMessageContent(AuthorRole.User, collectionItems)];
}
@@ -207,7 +206,7 @@ private IEnumerable ProcessMessageForOthers(MultiModalMessag
}
else if (item is ImageMessage imageContent)
{
- collections.Add(new ImageContent(new Uri(imageContent.Url)));
+ collections.Add(new ImageContent(new Uri(imageContent.Url ?? imageContent.BuildDataUri())));
}
else
{
diff --git a/dotnet/test/AutoGen.Tests/ApprovalTests/square.png b/dotnet/test/AutoGen.Tests/ApprovalTests/square.png
new file mode 100644
index 00000000000..afb4f4cd4df
--- /dev/null
+++ b/dotnet/test/AutoGen.Tests/ApprovalTests/square.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8323d0b8eceb752e14c29543b2e28bb2fc648ed9719095c31b7708867a4dc918
+size 491
diff --git a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj
index f7e6b036506..9a7b07b34dd 100644
--- a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj
+++ b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj
@@ -21,4 +21,10 @@
+
+
+ PreserveNewest
+
+
+
diff --git a/dotnet/test/AutoGen.Tests/BasicSampleTest.cs b/dotnet/test/AutoGen.Tests/BasicSampleTest.cs
index 19de2bdef4b..b9eea67397c 100644
--- a/dotnet/test/AutoGen.Tests/BasicSampleTest.cs
+++ b/dotnet/test/AutoGen.Tests/BasicSampleTest.cs
@@ -68,6 +68,12 @@ public async Task DalleAndGPT4VTestAsync()
await Example05_Dalle_And_GPT4V.RunAsync();
}
+ [ApiKeyFact("OPENAI_API_KEY")]
+ public async Task GPT4ImageMessage()
+ {
+ await Example15_GPT4V_BinaryDataImageMessage.RunAsync();
+ }
+
public class ConsoleWriter : StringWriter
{
private ITestOutputHelper output;
diff --git a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs
index d314b391bae..6dfb61761eb 100644
--- a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs
+++ b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs
@@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
+using System.IO;
using System.Linq;
using System.Threading.Tasks;
using AutoGen.OpenAI;
@@ -80,11 +81,24 @@ public async Task GPTAgentVisionTestAsync()
var imageMessage = new ImageMessage(Role.User, imageUri, from: "user");
+ string imagePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ApprovalTests", "square.png");
+ ImageMessage imageMessageData;
+ using (var fs = new FileStream(imagePath, FileMode.Open, FileAccess.Read))
+ {
+ var ms = new MemoryStream();
+ await fs.CopyToAsync(ms);
+ ms.Seek(0, SeekOrigin.Begin);
+ var imageData = await BinaryData.FromStreamAsync(ms, "image/png");
+ imageMessageData = new ImageMessage(Role.Assistant, imageData, from: "user");
+ }
+
IMessage[] messages = [
MessageEnvelope.Create(oaiMessage),
multiModalMessage,
imageMessage,
+ imageMessageData
];
+
foreach (var message in messages)
{
var response = await visionAgent.SendAsync(message);