From f4a07ff0eda2f9c9531e9ba20d1fe36bf1ccf33b Mon Sep 17 00:00:00 2001 From: David Luong Date: Wed, 1 May 2024 21:30:42 -0400 Subject: [PATCH] [.Net] Support raw-data in ImageMessage (#2552) * update * add sample project * revert notebook change back * update * update interactive version * add nuget package * refactor Message * update example * add azure nightly build pipeline * Set up CI with Azure Pipelines [skip ci] * Update nightly-build.yml for Azure Pipelines * add dotnet interactive package * add dotnet interactive package * update pipeline * add nuget feed back * remove dotnet-tool feed * remove dotnet-tool feed comment * update pipeline * update build name * Update nightly-build.yml * Delete .github/workflows/dotnet-ci.yml * update * add working_dir to use step * add initateChat api * update oai package * Update dotnet-build.yml * Update dotnet-run-openai-test-and-notebooks.yml * update build workflow * update build workflow * update nuget feed * update nuget feed * update aoai and sk version * Update InteractiveService.cs * add support for GPT 4V * add DalleAndGPT4V example * update example * add user proxy agent * add readme * bump version * update example * add dotnet interactive hook * update * udpate tests * add website * update index.md * add docs * update doc * move sk dependency out of core package * udpate doc * Update Use-function-call.md * add type safe function call document * update doc * update doc * add dock * Update Use-function-call.md * add GenerateReplyOptions * remove IChatLLM * update version * update doc * update website * add sample * fix link * add middleware agent * clean up doc * bump version * update doc * update * add Other Language * remove warnings * add sign.props * add sign step * fix pipelien * auth * real sign * disable PR trigger * update * disable PR trigger * use microbuild machine * update build pipeline to add publish to internal feed * add internal feed * fix build pipeline * add dotnet prefix * update ci * add build number * update run number * update source * update token * update * remove adding source * add publish to github package * try again * try again * ask for write pacakge * disable package when branch is not main * update * implement streaming agent * add test for streaming function call * update * fix #1588 * enable PR check for dotnet branch * add website readme * only publish to dotnet feed when pushing to dotnet branch * remove openai-test-and-notebooks workflow * update readme * update readme * update workflow * update getting-start * upgrade test and sample proejct to use .net 8 * fix global.json format && make loadFromConfig API internal only before implementing * update * add support for LM studio * add doc * Update README.md * add push and workflow_dispatch trigger * disable PR for main * add dotnet env * Update Installation.md * add nuget * refer to newtonsoft 13 * update branch to dotnet in docfx * Update Installation.md * pull out HumanInputMiddleware and FunctionCallMiddleware * fix tests * add link to sample folder * refactor message * refactor over IMessage * add more tests * add more test * fix build error * rename header * add semantic kernel project * update sk example * update dotnet version * add LMStudio function call example * rename LLaMAFunctin * remove dotnet run openai test and notebook workflow * add FunctionContract and test * update doc * add documents * add workflow * update * update sample * fix warning in test * reult length can be less then maximumOutputToKeep (#1804) * merge with main * add option to retrieve inner agent and middlewares from MiddlewareAgent * update doc * adjust namespace * update readme * fix test * use IMessage * more updates * update * fix test * add comments * use FunctionContract to replace FunctionDefinition * move AutoGen contrac to AutoGen.Core * update installation * refactor streamingAgent by adding StreamingMessage type * update sample * update samples * update * update * add test * fix test * bump version * add openaichat test * update * Update Example03_Agent_FunctionCall.cs * [.Net] improve docs (#1862) * add doc * add doc * add doc * add doc * add doc * add doc * update * fix test error * fix some error * fix test * fix test * add more tests * edits --------- Co-authored-by: ekzhu * [.Net] Add fill form example (#1911) * add form filler example * update * fix ci error * [.Net] Add using AutoGen.Core in source generator (#1983) * fix using namespace bug in source generator * remove using in sourcegenerator test * disable PR test * Add .idea to .gitignore (#1988) * [.Net] publish to nuget.org feed (#1987) * publish to nuget * update ci * update dotnet-release * update release pipeline * add source * remove empty symbol package * update pipeline * remove tag * update installation guide * [.Net] Rename some classes && APIs based on doc review (#1980) * rename sequential group chat to round robin group chat * rename to sendInstruction * rename workflow to graph * rename some api * bump version * move Graph to GroupChat folder * rename fill application example * [.Net] Improve package description (#2161) * add discord link and update package description * Update getting-start.md * [.Net] Fix document comment from the most recent AutoGen.Net engineer sync (#2231) * update * rename RegisterPrintMessageHook to RegisterPrintMessage * update website * update update.md * fix link error * [.Net] Enable JsonMode and deterministic output in AutoGen.OpenAI OpenAIChatAgent (#2347) * update openai version && add sample for json output * add example in web * update update.md * update image url * [.Net] Add AutoGen.Mistral package (#2330) * add mstral client * enable streaming support * add mistralClientAgent * add test for function call * add extension * add support for toolcall and toolcall result message * add support for aggregate message * implement streaming function call * track (#2471) * [.Net] add mistral example (#2482) * update existing examples to use messageCOnnector * add overview * add function call document * add example 14 * add mistral token count usage example * update version * Update dotnet-release.yml (#2488) * update * revert gitattributes * WIP : Binary ImageMessage * WIP : Able to pass unit test * Add example, cover more usages * Rename File --------- Co-authored-by: XiaoYun Zhang Co-authored-by: Xiaoyun Zhang Co-authored-by: mhensen Co-authored-by: ekzhu Co-authored-by: Krzysztof Kasprowicz <60486987+Krzysztof318@users.noreply.github.com> Co-authored-by: luongdavid --- .../AutoGen.BasicSample.csproj | 6 ++ .../Example15_GPT4V_BinaryDataImageMessage.cs | 62 +++++++++++++++++++ .../ImageResources/square.png | 3 + dotnet/src/AutoGen.Core/AutoGen.Core.csproj | 1 + .../src/AutoGen.Core/Message/ImageMessage.cs | 31 +++++++++- .../DTOs/ChatCompletionResponse.cs | 2 +- dotnet/src/AutoGen.Mistral/DTOs/Error.cs | 2 +- dotnet/src/AutoGen.Mistral/DTOs/Model.cs | 2 +- .../Extension/MessageExtension.cs | 4 +- .../OpenAIChatRequestMessageConnector.cs | 4 +- ...manticKernelChatMessageContentConnector.cs | 7 +-- .../AutoGen.Tests/ApprovalTests/square.png | 3 + .../test/AutoGen.Tests/AutoGen.Tests.csproj | 6 ++ dotnet/test/AutoGen.Tests/BasicSampleTest.cs | 6 ++ dotnet/test/AutoGen.Tests/SingleAgentTest.cs | 14 +++++ 15 files changed, 140 insertions(+), 13 deletions(-) create mode 100644 dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs create mode 100644 dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png create mode 100644 dotnet/test/AutoGen.Tests/ApprovalTests/square.png diff --git a/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj b/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj index c4e41261933..3c2b5166988 100644 --- a/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj +++ b/dotnet/sample/AutoGen.BasicSamples/AutoGen.BasicSample.csproj @@ -16,4 +16,10 @@ + + + + PreserveNewest + + diff --git a/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs b/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs new file mode 100644 index 00000000000..7a3422cb863 --- /dev/null +++ b/dotnet/sample/AutoGen.BasicSamples/Example15_GPT4V_BinaryDataImageMessage.cs @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Example15_ImageMessage.cs + +using AutoGen.Core; +using AutoGen.OpenAI; + +namespace AutoGen.BasicSample; + +/// +/// This example shows usage of ImageMessage. The image is loaded as BinaryData and sent to GPT-4V +///
+///
+/// Add additional images to the ImageResources to load and send more images to GPT-4V +///
+public static class Example15_GPT4V_BinaryDataImageMessage +{ + private static readonly string ImageResourcePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ImageResources"); + + private static Dictionary _mediaTypeMappings = new() + { + { ".png", "image/png" }, + { ".jpeg", "image/jpeg" }, + { ".jpg", "image/jpeg" }, + { ".gif", "image/gif" }, + { ".webp", "image/webp" } + }; + + public static async Task RunAsync() + { + var openAIKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? throw new Exception("Please set OPENAI_API_KEY environment variable."); + var openAiConfig = new OpenAIConfig(openAIKey, "gpt-4-vision-preview"); + + var visionAgent = new GPTAgent( + name: "gpt", + systemMessage: "You are a helpful AI assistant", + config: openAiConfig, + temperature: 0); + + List messages = + [new TextMessage(Role.User, "What is this image?", from: "user")]; + AddMessagesFromResource(ImageResourcePath, messages); + + var multiModalMessage = new MultiModalMessage(Role.User, messages, from: "user"); + var response = await visionAgent.SendAsync(multiModalMessage); + } + + private static void AddMessagesFromResource(string imageResourcePath, List messages) + { + foreach (string file in Directory.GetFiles(imageResourcePath)) + { + if (!_mediaTypeMappings.TryGetValue(Path.GetExtension(file).ToLowerInvariant(), out var mediaType)) + continue; + + using var fs = new FileStream(file, FileMode.Open, FileAccess.Read); + var ms = new MemoryStream(); + fs.CopyTo(ms); + ms.Seek(0, SeekOrigin.Begin); + var imageData = BinaryData.FromStream(ms, mediaType); + messages.Add(new ImageMessage(Role.Assistant, imageData, from: "user")); + } + } +} diff --git a/dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png b/dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png new file mode 100644 index 00000000000..afb4f4cd4df --- /dev/null +++ b/dotnet/sample/AutoGen.BasicSamples/ImageResources/square.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8323d0b8eceb752e14c29543b2e28bb2fc648ed9719095c31b7708867a4dc918 +size 491 diff --git a/dotnet/src/AutoGen.Core/AutoGen.Core.csproj b/dotnet/src/AutoGen.Core/AutoGen.Core.csproj index 409b6bc1aaf..ebbec3f0a46 100644 --- a/dotnet/src/AutoGen.Core/AutoGen.Core.csproj +++ b/dotnet/src/AutoGen.Core/AutoGen.Core.csproj @@ -16,6 +16,7 @@ + diff --git a/dotnet/src/AutoGen.Core/Message/ImageMessage.cs b/dotnet/src/AutoGen.Core/Message/ImageMessage.cs index 18ceea0d111..1239785c411 100644 --- a/dotnet/src/AutoGen.Core/Message/ImageMessage.cs +++ b/dotnet/src/AutoGen.Core/Message/ImageMessage.cs @@ -21,14 +21,41 @@ public ImageMessage(Role role, Uri uri, string? from = null) this.Url = uri.ToString(); } + public ImageMessage(Role role, BinaryData data, string? from = null) + { + if (data.IsEmpty) + { + throw new ArgumentException("Data cannot be empty", nameof(data)); + } + + if (string.IsNullOrWhiteSpace(data.MediaType)) + { + throw new ArgumentException("MediaType is needed for DataUri Images", nameof(data)); + } + + this.Role = role; + this.From = from; + this.Data = data; + } + public Role Role { get; set; } - public string Url { get; set; } + public string? Url { get; set; } public string? From { get; set; } + public BinaryData? Data { get; set; } + + public string BuildDataUri() + { + if (this.Data is null) + throw new NullReferenceException($"{nameof(Data)}"); + + return $"data:{this.Data.MediaType};base64,{Convert.ToBase64String(this.Data.ToArray())}"; + } + public override string ToString() { - return $"ImageMessage({this.Role}, {this.Url}, {this.From})"; + return $"ImageMessage({this.Role}, {(this.Data != null ? BuildDataUri() : this.Url) ?? string.Empty}, {this.From})"; } } diff --git a/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs b/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs index ff241f8d340..13e29e7139b 100644 --- a/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs +++ b/dotnet/src/AutoGen.Mistral/DTOs/ChatCompletionResponse.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. All rights reserved. // ChatCompletionResponse.cs using System.Collections.Generic; diff --git a/dotnet/src/AutoGen.Mistral/DTOs/Error.cs b/dotnet/src/AutoGen.Mistral/DTOs/Error.cs index 77eb2d341fb..8bddcfc776c 100644 --- a/dotnet/src/AutoGen.Mistral/DTOs/Error.cs +++ b/dotnet/src/AutoGen.Mistral/DTOs/Error.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. All rights reserved. // Error.cs using System.Text.Json.Serialization; diff --git a/dotnet/src/AutoGen.Mistral/DTOs/Model.cs b/dotnet/src/AutoGen.Mistral/DTOs/Model.cs index 915d2f737ec..70a4b3c997d 100644 --- a/dotnet/src/AutoGen.Mistral/DTOs/Model.cs +++ b/dotnet/src/AutoGen.Mistral/DTOs/Model.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. All rights reserved. // Model.cs using System; diff --git a/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs b/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs index 92e0f3776f5..b3dfb1e8668 100644 --- a/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs +++ b/dotnet/src/AutoGen.OpenAI/Extension/MessageExtension.cs @@ -77,7 +77,7 @@ public static IEnumerable ToOpenAIChatRequestMessage(this IA else if (message is ImageMessage imageMessage) { // multi-modal - var msg = new ChatRequestUserMessage(new ChatMessageImageContentItem(new Uri(imageMessage.Url))); + var msg = new ChatRequestUserMessage(new ChatMessageImageContentItem(new Uri(imageMessage.Url ?? imageMessage.BuildDataUri()))); return [msg]; } @@ -101,7 +101,7 @@ public static IEnumerable ToOpenAIChatRequestMessage(this IA return m switch { TextMessage textMessage => new ChatMessageTextContentItem(textMessage.Content), - ImageMessage imageMessage => new ChatMessageImageContentItem(new Uri(imageMessage.Url)), + ImageMessage imageMessage => new ChatMessageImageContentItem(new Uri(imageMessage.Url ?? imageMessage.BuildDataUri())), _ => throw new ArgumentException($"Unknown message type: {m.GetType()}") }; }); diff --git a/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs b/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs index c1581cbec08..1276e93f9fb 100644 --- a/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs +++ b/dotnet/src/AutoGen.OpenAI/Middleware/OpenAIChatRequestMessageConnector.cs @@ -336,7 +336,7 @@ private IEnumerable ProcessIncomingMessagesForOther(TextMess private IEnumerable ProcessIncomingMessagesForOther(ImageMessage message) { return new[] { new ChatRequestUserMessage([ - new ChatMessageImageContentItem(new Uri(message.Url)), + new ChatMessageImageContentItem(new Uri(message.Url ?? message.BuildDataUri())), ])}; } @@ -345,7 +345,7 @@ private IEnumerable ProcessIncomingMessagesForOther(MultiMod IEnumerable items = message.Content.Select(ci => ci switch { TextMessage text => new ChatMessageTextContentItem(text.Content), - ImageMessage image => new ChatMessageImageContentItem(new Uri(image.Url)), + ImageMessage image => new ChatMessageImageContentItem(new Uri(image.Url ?? image.BuildDataUri())), _ => throw new NotImplementedException(), }); diff --git a/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs b/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs index e4b7527cd05..557683c9615 100644 --- a/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs +++ b/dotnet/src/AutoGen.SemanticKernel/Middleware/SemanticKernelChatMessageContentConnector.cs @@ -92,7 +92,7 @@ private IMessage PostProcessMessage(IMessage messageEnvelope { TextContent txt => new TextMessage(Role.Assistant, txt.Text!, messageEnvelope.From), ImageContent img when img.Uri is Uri uri => new ImageMessage(Role.Assistant, uri.ToString(), from: messageEnvelope.From), - ImageContent img when img.Uri is null => throw new InvalidOperationException("ImageContent.Uri is null"), + ImageContent img when img.Data is ReadOnlyMemory data => new ImageMessage(Role.Assistant, BinaryData.FromBytes(data), from: messageEnvelope.From), _ => throw new InvalidOperationException("Unsupported content type"), }); @@ -185,9 +185,8 @@ private IEnumerable ProcessMessageForOthers(TextMessage mess private IEnumerable ProcessMessageForOthers(ImageMessage message) { - var imageContent = new ImageContent(new Uri(message.Url)); var collectionItems = new ChatMessageContentItemCollection(); - collectionItems.Add(imageContent); + collectionItems.Add(new ImageContent(new Uri(message.Url ?? message.BuildDataUri()))); return [new ChatMessageContent(AuthorRole.User, collectionItems)]; } @@ -207,7 +206,7 @@ private IEnumerable ProcessMessageForOthers(MultiModalMessag } else if (item is ImageMessage imageContent) { - collections.Add(new ImageContent(new Uri(imageContent.Url))); + collections.Add(new ImageContent(new Uri(imageContent.Url ?? imageContent.BuildDataUri()))); } else { diff --git a/dotnet/test/AutoGen.Tests/ApprovalTests/square.png b/dotnet/test/AutoGen.Tests/ApprovalTests/square.png new file mode 100644 index 00000000000..afb4f4cd4df --- /dev/null +++ b/dotnet/test/AutoGen.Tests/ApprovalTests/square.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8323d0b8eceb752e14c29543b2e28bb2fc648ed9719095c31b7708867a4dc918 +size 491 diff --git a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj index f7e6b036506..9a7b07b34dd 100644 --- a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj +++ b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj @@ -21,4 +21,10 @@ + + + PreserveNewest + + + diff --git a/dotnet/test/AutoGen.Tests/BasicSampleTest.cs b/dotnet/test/AutoGen.Tests/BasicSampleTest.cs index 19de2bdef4b..b9eea67397c 100644 --- a/dotnet/test/AutoGen.Tests/BasicSampleTest.cs +++ b/dotnet/test/AutoGen.Tests/BasicSampleTest.cs @@ -68,6 +68,12 @@ public async Task DalleAndGPT4VTestAsync() await Example05_Dalle_And_GPT4V.RunAsync(); } + [ApiKeyFact("OPENAI_API_KEY")] + public async Task GPT4ImageMessage() + { + await Example15_GPT4V_BinaryDataImageMessage.RunAsync(); + } + public class ConsoleWriter : StringWriter { private ITestOutputHelper output; diff --git a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs index d314b391bae..6dfb61761eb 100644 --- a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs +++ b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Threading.Tasks; using AutoGen.OpenAI; @@ -80,11 +81,24 @@ public async Task GPTAgentVisionTestAsync() var imageMessage = new ImageMessage(Role.User, imageUri, from: "user"); + string imagePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ApprovalTests", "square.png"); + ImageMessage imageMessageData; + using (var fs = new FileStream(imagePath, FileMode.Open, FileAccess.Read)) + { + var ms = new MemoryStream(); + await fs.CopyToAsync(ms); + ms.Seek(0, SeekOrigin.Begin); + var imageData = await BinaryData.FromStreamAsync(ms, "image/png"); + imageMessageData = new ImageMessage(Role.Assistant, imageData, from: "user"); + } + IMessage[] messages = [ MessageEnvelope.Create(oaiMessage), multiModalMessage, imageMessage, + imageMessageData ]; + foreach (var message in messages) { var response = await visionAgent.SendAsync(message);