Skip to content

Commit

Permalink
feat: replace deprecated model gpt-4-vision-preview with gpt-4o
Browse files Browse the repository at this point in the history
- replace openAI patched library with last official version

resolve #30
  • Loading branch information
bsorrentino committed Jun 30, 2024
1 parent 28f55f4 commit a2795d8
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 146 deletions.
8 changes: 5 additions & 3 deletions AIAgent/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@ let package = Package(
dependencies: [
.package(url: "https://github.com/bsorrentino/LangGraph-Swift.git", exact: "1.2.2"),
// .package(path: "/Users/bsorrentino/WORKSPACES/GITHUB.me/AppleOS/LangGraph-Swift"),
.package(url: "https://github.com/bsorrentino/Swift-OpenAI.git", branch: "develop"), // Add the dependency here
],
// .package(url: "https://github.com/bsorrentino/Swift-OpenAI.git", branch: "develop"), // Add the dependency here
.package(url: "https://github.com/MacPaw/OpenAI.git", branch: "main")
],
targets: [
// Targets are the basic building blocks of a package, defining a module or a test suite.
// Targets can depend on other targets in this package and products from dependencies.
.target(
name: "AIAgent",
dependencies: [
.product(name: "OpenAI", package: "Swift-OpenAI"),
// .product(name: "OpenAI", package: "Swift-OpenAI"),
.product(name: "OpenAI", package: "OpenAI"),
.product(name: "LangGraph", package: "LangGraph-Swift")
], resources: [ .process("Resources")]),
.testTarget(
Expand Down
135 changes: 88 additions & 47 deletions AIAgent/Sources/AIAgent/AgentExecutor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ struct DiagramDescription : Codable {
var description: DiagramNLPDescription // NLP description
}

public enum DiagramImageValue {
case data( Data )
case url( String )
}

struct AgentExecutorState : AgentState {

var data: [String : Any]
Expand All @@ -77,8 +82,8 @@ struct AgentExecutorState : AgentState {
data = initState
}

var diagramImageUrlOrData:String? {
data["diagram_image_url_or_data"] as? String
var diagramImageUrlOrData:DiagramImageValue? {
data["diagram_image_url_or_data"] as? DiagramImageValue
}

var diagramCode:String? {
Expand Down Expand Up @@ -120,24 +125,42 @@ func describeDiagramImage<T:AgentExecutorDelegate>( state: AgentExecutorState,
openAI:OpenAI,
delegate:T ) async throws -> PartialAgentState {

guard let imageUrl = state.diagramImageUrlOrData else {
guard let imageUrlValue = state.diagramImageUrlOrData else {
throw _EX("diagramImageUrlOrData not initialized!")
}

await delegate.progress("starting analyze\ndiagram 👀")

let prompt = try loadPromptFromBundle(fileName: "describe_diagram_prompt")

let query = switch( imageUrlValue ) {
case .url( let url):
ChatQuery(messages: [
.user(.init(content: .vision([
.chatCompletionContentPartTextParam(.init(text: prompt)),
.chatCompletionContentPartImageParam(.init(imageUrl: .init(url: url, detail: .auto)))
])))
], model: Model.gpt4_vision_preview, maxTokens: 2000)
case .data(let data):
ChatQuery(messages: [
.user(.init(content: .vision([
.chatCompletionContentPartTextParam(.init(text: prompt)),
.chatCompletionContentPartImageParam(.init(imageUrl: .init(url: data, detail: .auto)))
])))
], model: Model.gpt4_o, maxTokens: 2000)

}

let query = ChatQuery(
model: .gpt4_vision_preview,
messages: [
Chat(role: .user, content: [
ChatContent(text: prompt),
ChatContent(imageUrl: imageUrl )
])
],
maxTokens: 2000
)
// let query = ChatQuery(
// model: .gpt4_vision_preview,
// messages: [
// Chat(role: .user, content: [
// ChatContent(text: prompt),
// ChatContent(imageUrl: imageUrl )
// ])
// ],
// maxTokens: 2000
// )

let chatResult = try await openAI.chats(query: query)

Expand Down Expand Up @@ -179,15 +202,19 @@ func translateSequenceDiagramDescriptionToPlantUML<T:AgentExecutorDelegate>( sta
.replacingOccurrences(of: "{diagram_title}", with: diagram.title)
.replacingOccurrences(of: "{diagram_description}", with: description)

let query = ChatQuery(
model: .gpt3_5Turbo,
messages: [
Chat(role: .user, content: [
ChatContent(text: prompt),
])
],
maxTokens: 2000
)
let query = ChatQuery(messages: [
.user(.init(content: .string(prompt)))
], model: Model.gpt3_5Turbo, maxTokens: 2000)

// let query = ChatQuery(
// model: .gpt3_5Turbo,
// messages: [
// Chat(role: .user, content: [
// ChatContent(text: prompt),
// ])
// ],
// maxTokens: 2000
// )

let chatResult = try await openAI.chats(query: query)

Expand Down Expand Up @@ -224,15 +251,19 @@ func translateGenericDiagramDescriptionToPlantUML<T:AgentExecutorDelegate>( stat
prompt = prompt
.replacingOccurrences(of: "{diagram_description}", with: content)

let query = ChatQuery(
model: .gpt3_5Turbo,
messages: [
Chat(role: .user, content: [
ChatContent(text: prompt),
])
],
maxTokens: 2000
)
let query = ChatQuery(messages: [
.user(.init(content: .string(prompt)))
], model: Model.gpt3_5Turbo, maxTokens: 2000)

// let query = ChatQuery(
// model: .gpt3_5Turbo,
// messages: [
// Chat(role: .user, content: [
// ChatContent(text: prompt),
// ])
// ],
// maxTokens: 2000
// )

let chatResult = try await openAI.chats(query: query)

Expand Down Expand Up @@ -264,7 +295,7 @@ func routeDiagramTranslation( state: AgentExecutorState ) async throws -> String
}

public func runTranslateDrawingToPlantUML<T:AgentExecutorDelegate>( openAI: OpenAI,
imageUrl: String,
imageValue: DiagramImageValue,
delegate:T ) async throws -> String? {

let workflow = GraphState { AgentExecutorState() }
Expand Down Expand Up @@ -295,7 +326,7 @@ public func runTranslateDrawingToPlantUML<T:AgentExecutorDelegate>( openAI: Open
let app = try workflow.compile()

let inputs:[String : Any] = [
"diagram_image_url_or_data": imageUrl
"diagram_image_url_or_data": imageValue
]

let response = try await app.invoke( inputs: inputs)
Expand All @@ -308,20 +339,30 @@ public func updatePlantUML( openAI: OpenAI,
withModel model: Model,
input: String,
withInstruction instruction: String ) async throws -> String? {
let query = ChatQuery(
model: model,
messages: [
.init(role: .system, content:
"""
You are my plantUML assistant.
You must answer exclusively with diagram syntax.
"""),
.init( role: .assistant, content: input ),
.init( role: .user, content: instruction )
],
temperature: 0.0,
topP: 1.0
)

let query = ChatQuery(messages: [
.system(.init(content: """
You are my plantUML assistant.
You must answer exclusively with diagram syntax.
""")),
.assistant(.init( content: input)),
.user(.init(content: .string(instruction)))
], model: model, temperature: 0.0, topP: 1.0)

// let query = ChatQuery(
// model: model,
// messages: [
// .init(role: .system, content:
// """
// You are my plantUML assistant.
// You must answer exclusively with diagram syntax.
// """),
// .init( role: .assistant, content: input ),
// .init( role: .user, content: instruction )
// ],
// temperature: 0.0,
// topP: 1.0
// )

let chat = try await openAI.chats(query: query)

Expand Down
2 changes: 1 addition & 1 deletion AIAgent/Sources/AIAgent/AgentExecutorDemo.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ struct AgentExecutorDemoState : AgentState {
}

public func runTranslateDrawingToPlantUMLDemo<T:AgentExecutorDelegate>( openAI: OpenAI,
imageUrl: String,
imageValue: DiagramImageValue,
delegate:T ) async throws -> String? {

let workflow = GraphState { AgentExecutorState() }
Expand Down
7 changes: 4 additions & 3 deletions PlantUML/OpenAIObservableService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class OpenAIObservableService : ObservableObject {
// @Published public var inputModel:String

@AppStorage("openaiModel") private var openAIModel:String = "gpt-3.5-turbo"
@AppStorage("visionModel") private var visionModel:String = "gpt-4o"
@AppSecureStorage("openaikey") private var openAIKey:String?
@AppSecureStorage("openaiorg") private var openAIOrg:String?

Expand Down Expand Up @@ -140,7 +141,7 @@ class OpenAIObservableService : ObservableObject {
extension OpenAIObservableService {

@MainActor
func processImageWithAgents<T:AgentExecutorDelegate>( imageUrl: String, delegate:T ) async -> String? {
func processImageWithAgents<T:AgentExecutorDelegate>( imageData: Data, delegate:T ) async -> String? {

guard let openAI, case .Ready = status else {
delegate.progress("WARNING: OpenAI API not initialized")
Expand All @@ -152,8 +153,8 @@ extension OpenAIObservableService {
do {

async let runTranslation = DEMO_MODE ?
try runTranslateDrawingToPlantUMLDemo( openAI: openAI, imageUrl: imageUrl, delegate:delegate) :
try runTranslateDrawingToPlantUML( openAI: openAI, imageUrl: imageUrl, delegate:delegate);
try runTranslateDrawingToPlantUMLDemo( openAI: openAI, imageValue: DiagramImageValue.data(imageData), delegate:delegate) :
try runTranslateDrawingToPlantUML( openAI: openAI, imageValue: DiagramImageValue.data(imageData), delegate:delegate);


if let content = try await runTranslation {
Expand Down
23 changes: 15 additions & 8 deletions PlantUML/PlantUMLDrawingView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,11 @@ extension PlantUMLDrawingView : AgentExecutorDelegate {
let image = image().withBackground(color: backgroundColor)

if let imageData = image.pngData() {

if SAVE_DRAWING_IMAGE {
saveData(imageData, toFile: "image.png", inDirectory: .picturesDirectory)
}

let base64Image = imageData.base64EncodedString()

processing.toggle()
isUseDrawingTool = false
Expand All @@ -146,16 +145,24 @@ extension PlantUMLDrawingView : AgentExecutorDelegate {
document.drawing = canvas.drawing.dataRepresentation()
dismiss()
}

if let content = await service.processImageWithAgents( imageUrl: "data:image/png;base64,\(base64Image)", delegate: self ) {

document.text = content
if let content = await service.processImageWithAgents( imageData: imageData, delegate: self ) {
document.text = content

}

// let base64Image = imageData.base64EncodedString()
//
// if let content = await service.processImageWithAgents( imageUrl: "data:image/png;base64,\(base64Image)", delegate: self ) {
//
// document.text = content
//
// }

}

}

}

}
Expand Down
52 changes: 20 additions & 32 deletions PlantUML/Settings.bundle/Root.plist
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,6 @@
<key>Type</key>
<string>PSGroupSpecifier</string>
</dict>
<!--
<dict>
<key>Type</key>
<string>PSTitleValueSpecifier</string>
<key>Title</key>
<string>Font Size</string>
<key>Key</key>
<string>fontSize</string>
<key>DefaultValue</key>
<string>20</string>
</dict>
<dict>
<key>Type</key>
<string>PSSliderSpecifier</string>
<key>Key</key>
<string>fontSize</string>
<key>DefaultValue</key>
<integer>20</integer>
<key>Title</key>
<string></string>
<key>MaximumValue</key>
<integer>30</integer>
<key>MaximumValueImage</key>
<string>textformat_size_larger.png</string>
<key>MinimumValue</key>
<integer>10</integer>
<key>MinimumValueImage</key>
<string>textformat_size_smaller.png</string>
</dict>
-->
<dict>
<key>DefaultValue</key>
<string>chrome</string>
Expand Down Expand Up @@ -236,11 +206,11 @@
<key>Key</key>
<string>openaiModel</string>
<key>Title</key>
<string>Model</string>
<string>Prompt Model</string>
<key>Titles</key>
<array>
<string>gpt-3.5-turbo</string>
<string>gpt-4</string>
<string>gpt-4o</string>
</array>
<key>Type</key>
<string>PSMultiValueSpecifier</string>
Expand All @@ -250,6 +220,24 @@
<string>gpt-4</string>
</array>
</dict>
<dict>
<key>DefaultValue</key>
<string>gpt-4o</string>
<key>Key</key>
<string>visionModel</string>
<key>Title</key>
<string>Vision Model</string>
<key>Titles</key>
<array>
<string>gpt-4o</string>
</array>
<key>Type</key>
<string>PSMultiValueSpecifier</string>
<key>Values</key>
<array>
<string>gpt-4o</string>
</array>
</dict>
</array>
</dict>
</plist>
Loading

0 comments on commit a2795d8

Please sign in to comment.