Merge pull request #115 from rawnly/main

New Models and Enhancements
MacPaw · Nov 17, 2023 · 9e70523 · 9e70523
2 parents 2835637 + 1a08386
commit 9e70523
Show file tree

Hide file tree

Showing 9 changed files with 143 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -742,12 +742,16 @@ Models are represented as a typealias `typealias Model = String`.
 
 ```swift
 public extension Model {
+		static let gpt4_1106_preview = "gpt-4-1106-preview"
+		static let gpt4_vision_preview = "gpt-4-vision-preview"
     static let gpt4 = "gpt-4"
     static let gpt4_0314 = "gpt-4-0314"
     static let gpt4_32k = "gpt-4-32k"
     static let gpt4_32k_0314 = "gpt-4-32k-0314"
     static let gpt3_5Turbo = "gpt-3.5-turbo"
+    static let gpt3_5Turbo_1106 = "gpt-3.5-turbo-1106"
     static let gpt3_5Turbo0301 = "gpt-3.5-turbo-0301"
+
 
     static let textDavinci_003 = "text-davinci-003"
     static let textDavinci_002 = "text-davinci-002"
@@ -773,6 +777,9 @@ public extension Model {
     static let textModerationStable = "text-moderation-stable"
     static let textModerationLatest = "text-moderation-latest"
     static let moderation = "text-moderation-001"
+
+    static let dall_e_2 = "dall-e-2"
+    static let dall_e_3 = "dall-e-3"
 }
 ```
 

diff --git a/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift b/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift
@@ -7,23 +7,34 @@
 
 import Foundation
 
+public enum AudioResponseFormat: String, Codable, Equatable {
+    case json
+    case text
+    case verboseJson = "verbose_json"
+    case srt
+    case vtt
+}
+
 public struct AudioTranscriptionQuery: Codable, Equatable {
+    public typealias ResponseFormat = AudioResponseFormat
 
     public let file: Data
     public let fileName: String
     public let model: Model
+    public let responseFormat: Self.ResponseFormat?
 
     public let prompt: String?
     public let temperature: Double?
     public let language: String?
 
-    public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil) {
+    public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) {
         self.file = file
         self.fileName = fileName
         self.model = model
         self.prompt = prompt
         self.temperature = temperature
         self.language = language
+        self.responseFormat = responseFormat
     }
 }
 
@@ -35,7 +46,8 @@ extension AudioTranscriptionQuery: MultipartFormDataBodyEncodable {
             .string(paramName: "model", value: model),
             .string(paramName: "prompt", value: prompt),
             .string(paramName: "temperature", value: temperature),
-            .string(paramName: "language", value: language)
+            .string(paramName: "language", value: language),
+            .string(paramName: "response_format", value: responseFormat)
         ])
         return bodyBuilder.build()
     }

diff --git a/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift b/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift
@@ -8,20 +8,23 @@
 import Foundation
 
 public struct AudioTranslationQuery: Codable, Equatable {
+    public typealias ResponseFormat = AudioResponseFormat
 
     public let file: Data
     public let fileName: String
     public let model: Model
 
+    public let responseFormat: Self.ResponseFormat?
     public let prompt: String?
     public let temperature: Double?
 
-    public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil) {
+    public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) {
         self.file = file
         self.fileName = fileName
         self.model = model
         self.prompt = prompt
         self.temperature = temperature
+        self.responseFormat = responseFormat
     }
 }
 
@@ -32,6 +35,7 @@ extension AudioTranslationQuery: MultipartFormDataBodyEncodable {
             .file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"),
             .string(paramName: "model", value: model),
             .string(paramName: "prompt", value: prompt),
+            .string(paramName: "response_format", value: responseFormat),
             .string(paramName: "temperature", value: temperature)
         ])
         return bodyBuilder.build()

diff --git a/Sources/OpenAI/Public/Models/ChatQuery.swift b/Sources/OpenAI/Public/Models/ChatQuery.swift
@@ -7,6 +7,19 @@
 
 import Foundation
 
+// See more https://platform.openai.com/docs/guides/text-generation/json-mode
+public struct ResponseFormat: Codable, Equatable {
+    public static let jsonObject = ResponseFormat(type: .jsonObject)
+    public static let text = ResponseFormat(type: .text)
+
+    public let type: Self.ResponseFormatType
+
+    public enum ResponseFormatType: String, Codable, Equatable {
+        case jsonObject = "json_object"
+        case text
+    }
+}
+
 public struct Chat: Codable, Equatable {
     public let role: Role
     /// The contents of the message. `content` is required for all messages except assistant messages with function calls.
@@ -68,6 +81,7 @@ public struct ChatFunctionCall: Codable, Equatable {
     }
 }
 
+
 /// See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
 public struct JSONSchema: Codable, Equatable {
     public let type: JSONType
@@ -211,6 +225,8 @@ public struct ChatQueryFunctionCall: Codable, Equatable {
 public struct ChatQuery: Equatable, Codable, Streamable {
     /// ID of the model to use. Currently, only gpt-3.5-turbo and gpt-3.5-turbo-0301 are supported.
     public let model: Model
+    /// An object specifying the format that the model must output.
+    public let responseFormat: ResponseFormat?
     /// The messages to generate chat completions for
     public let messages: [Chat]
     /// A list of functions the model may generate JSON inputs for.
@@ -279,16 +295,18 @@ public struct ChatQuery: Equatable, Codable, Streamable {
         case frequencyPenalty = "frequency_penalty"
         case logitBias = "logit_bias"
         case user
+        case responseFormat = "response_format"
     }
 
-  public init(model: Model, messages: [Chat], functions: [ChatFunctionDeclaration]? = nil, functionCall: FunctionCall? = nil, temperature: Double? = nil, topP: Double? = nil, n: Int? = nil, stop: [String]? = nil, maxTokens: Int? = nil, presencePenalty: Double? = nil, frequencyPenalty: Double? = nil, logitBias: [String : Int]? = nil, user: String? = nil, stream: Bool = false) {
+    public init(model: Model, messages: [Chat], responseFormat: ResponseFormat? = nil, functions: [ChatFunctionDeclaration]? = nil, functionCall: FunctionCall? = nil, temperature: Double? = nil, topP: Double? = nil, n: Int? = nil, stop: [String]? = nil, maxTokens: Int? = nil, presencePenalty: Double? = nil, frequencyPenalty: Double? = nil, logitBias: [String : Int]? = nil, user: String? = nil, stream: Bool = false) {
         self.model = model
         self.messages = messages
         self.functions = functions
         self.functionCall = functionCall
         self.temperature = temperature
         self.topP = topP
         self.n = n
+        self.responseFormat = responseFormat
         self.stop = stop
         self.maxTokens = maxTokens
         self.presencePenalty = presencePenalty

diff --git a/Sources/OpenAI/Public/Models/ImagesQuery.swift b/Sources/OpenAI/Public/Models/ImagesQuery.swift
@@ -7,17 +7,48 @@
 
 import Foundation
 
+
+public enum ImageResponseFormat: String, Codable, Equatable {
+    case url
+    case b64_json
+}
+
 public struct ImagesQuery: Codable {
+    public typealias ResponseFormat = ImageResponseFormat
+
     /// A text description of the desired image(s). The maximum length is 1000 characters.
     public let prompt: String
+
+    /// ID of the model to use.
+    public let model: Model?
+    /// The format in which the generated images are returned
+    public let responseFormat: Self.ResponseFormat?
     /// The number of images to generate. Must be between 1 and 10.
     public let n: Int?
     /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
     public let size: String?
+    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+    public let user: String?
+    /// The style of the generated images. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for dall-e-3.
+    public let style: String?
 
-    public init(prompt: String, n: Int?, size: String?) {
+    public init(prompt: String, model: Model?=nil, responseFormat: Self.ResponseFormat?=nil, n: Int?, size: String?, style: String?=nil, user:String?=nil) {
+        self.style = style
         self.prompt = prompt
         self.n = n
         self.size = size
+        self.model = model
+        self.responseFormat = responseFormat
+        self.user = user
+    }
+
+    public enum CodingKeys: String, CodingKey {
+        case model
+        case prompt
+        case n
+        case size
+        case user
+        case style
+        case responseFormat = "response_format"
     }
 }
diff --git a/Sources/OpenAI/Public/Models/ImagesResult.swift b/Sources/OpenAI/Public/Models/ImagesResult.swift
@@ -10,8 +10,10 @@ import Foundation
 public struct ImagesResult: Codable, Equatable {
 
     public struct URLResult: Codable, Equatable {
-        public let url: String
+        public let url: String?
+        public let b64_json: String?
     }
+
     public let created: TimeInterval
     public let data: [URLResult]
 }

diff --git a/Sources/OpenAI/Public/Models/Models/Models.swift b/Sources/OpenAI/Public/Models/Models/Models.swift
@@ -14,6 +14,14 @@ public extension Model {
 
     /// More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. Will be updated with our latest model iteration 2 weeks after it is released.
     static let gpt4 = "gpt-4"
+
+    /// GPT-4 Turbo, teh latest gpt-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling and more.
+    /// Maximum of 4096 output tokens
+    static let gpt4_1106_preview = "gpt-4-1106-preview"
+
+    /// Ability to understand images, in addition to all other GPT-4 Turbo capabilities.
+    static let gpt4_vision_preview = "gpt-4-vision-preview"
+
     /// Snapshot of gpt-4 from March 14th 2023. Unlike gpt-4, this model will not receive updates, and will only be supported for a three month period ending on June 14th 2023.
     @available(*, deprecated, message: "Please upgrade to the newer model")
     static let gpt4_0314 = "gpt-4-0314"
@@ -22,15 +30,21 @@ public extension Model {
     /// Same capabilities as the base gpt-4 mode but with 4x the context length. Will be updated with our latest model iteration.
     static let gpt4_32k = "gpt-4-32k"
     /// Snapshot of gpt-4-32 from March 14th 2023. Unlike gpt-4-32k, this model will not receive updates, and will only be supported for a three month period ending on June 14th 2023.
+    @available(*, deprecated, message: "Please upgrade to the newer model")
     static let gpt4_32k_0314 = "gpt-4-32k-0314"
     /// Snapshot of gpt-4-32 from June 13th 2023. Unlike gpt-4-32k, this model will not receive updates, and will be deprecated 3 months after a new version is released.
     static let gpt4_32k_0613 = "gpt-4-32k-0613"
+
+    /// The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling and more.
+    static let gpt3_5Turbo_1106 = "gpt-3.5-turbo-1106"
+
     /// Most capable GPT-3.5 model and optimized for chat at 1/10th the cost of text-davinci-003. Will be updated with our latest model iteration.
     static let gpt3_5Turbo = "gpt-3.5-turbo"
     /// Snapshot of gpt-3.5-turbo from March 1st 2023. Unlike gpt-3.5-turbo, this model will not receive updates, and will only be supported for a three month period ending on June 1st 2023.
     @available(*, deprecated, message: "Please upgrade to the newer model")
     static let gpt3_5Turbo0301 = "gpt-3.5-turbo-0301"
     /// Snapshot of gpt-3.5-turbo from June 13th 2023 with function calling data. Unlike gpt-3.5-turbo, this model will not receive updates, and will be deprecated 3 months after a new version is released.
+    @available(*, deprecated, message: "Please upgrade to the newer model")
     static let gpt3_5Turbo0613 = "gpt-3.5-turbo-0613"
     /// Same capabilities as the standard gpt-3.5-turbo model but with 4 times the context.
     static let gpt3_5Turbo_16k = "gpt-3.5-turbo-16k"
@@ -58,6 +72,10 @@ public extension Model {
     // Transcriptions / Translations
 
     static let whisper_1 = "whisper-1"
+
+    // Image Generation
+    static let dall_e_2 = "dall-e-2"
+    static let dall_e_3 = "dall-e-3"
 
     // Fine Tunes
 

diff --git a/Tests/OpenAITests/OpenAITests.swift b/Tests/OpenAITests/OpenAITests.swift
@@ -44,9 +44,9 @@ class OpenAITests: XCTestCase {
     }
 
     func testImages() async throws {
-        let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", n: 1, size: "1024x1024")
+        let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", model: .dall_e_2, n: 1, size: "1024x1024")
         let imagesResult = ImagesResult(created: 100, data: [
-            .init(url: "http://foo.bar")
+            .init(url: "http://foo.bar", b64_json: nil)
         ])
         try self.stub(result: imagesResult)
         let result = try await openAI.images(query: query)
@@ -65,7 +65,7 @@ class OpenAITests: XCTestCase {
     func testImageEdit() async throws {
         let query = ImageEditsQuery(image: Data(), fileName: "whitecat.png", prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", n: 1, size: "1024x1024")
         let imagesResult = ImagesResult(created: 100, data: [
-            .init(url: "http://foo.bar")
+            .init(url: "http://foo.bar", b64_json: nil)
         ])
         try self.stub(result: imagesResult)
         let result = try await openAI.imageEdits(query: query)
@@ -84,7 +84,7 @@ class OpenAITests: XCTestCase {
     func testImageVariation() async throws {
         let query = ImageVariationsQuery(image: Data(), fileName: "whitecat.png", n: 1, size: "1024x1024")
         let imagesResult = ImagesResult(created: 100, data: [
-            .init(url: "http://foo.bar")
+            .init(url: "http://foo.bar", b64_json: nil)
         ])
         try self.stub(result: imagesResult)
         let result = try await openAI.imageVariations(query: query)
@@ -117,7 +117,7 @@ class OpenAITests: XCTestCase {
     }
 
     func testChatsFunction() async throws {
-        let query = ChatQuery(model: .gpt3_5Turbo0613, messages: [
+        let query = ChatQuery(model: .gpt3_5Turbo_1106, messages: [
             .init(role: .system, content: "You are Weather-GPT. You know everything about the weather."),
             .init(role: .user, content: "What's the weather like in Boston?"),
         ], functions: [

diff --git a/Tests/OpenAITests/OpenAITestsDecoder.swift b/Tests/OpenAITests/OpenAITestsDecoder.swift
@@ -66,14 +66,18 @@ class OpenAITestsDecoder: XCTestCase {
             },
             {
               "url": "https://bar.foo"
+            },
+            {
+                "b64_json": "test"
             }
           ]
         }
         """
 
         let expectedValue = ImagesResult(created: 1589478378, data: [
-            .init(url: "https://foo.bar"),
-            .init(url: "https://bar.foo")
+            .init(url: "https://foo.bar", b64_json: nil),
+            .init(url: "https://bar.foo", b64_json: nil),
+            .init(url: nil, b64_json: "test")
         ])
         try decode(data, expectedValue)
     }
@@ -106,13 +110,44 @@ class OpenAITestsDecoder: XCTestCase {
         ], usage: .init(promptTokens: 9, completionTokens: 12, totalTokens: 21))
         try decode(data, expectedValue)
     }
+
+    func testImageQuery() async throws {
+        let imageQuery = ImagesQuery(
+            prompt: "test",
+            model: .dall_e_2,
+            responseFormat: .b64_json,
+            n: 1,
+            size: "10",
+            style: "vivid",
+            user: "user"
+        )
+
+        let expectedValue = """
+        {
+            "model": "dall-e-2",
+            "prompt": "test",
+            "n": 1,
+            "size": "10",
+            "style": "vivid",
+            "user": "user",
+            "response_format": "b64_json"
+        }
+        """
+
+        // To compare serialized JSONs we first convert them both into NSDictionary which are comparable (unline native swift dictionaries)
+        let imageQueryAsDict = try jsonDataAsNSDictionary(JSONEncoder().encode(imageQuery))
+        let expectedValueAsDict = try jsonDataAsNSDictionary(expectedValue.data(using: .utf8)!)
+
+        XCTAssertEqual(imageQueryAsDict, expectedValueAsDict)
+    }
 
     func testChatQueryWithFunctionCall() async throws {
         let chatQuery = ChatQuery(
             model: .gpt3_5Turbo,
             messages: [
                 Chat(role: .user, content: "What's the weather like in Boston?")
             ],
+            responseFormat: .init(type: .jsonObject),
             functions: [
                 ChatFunctionDeclaration(
                     name: "get_current_weather",
@@ -135,6 +170,9 @@ class OpenAITestsDecoder: XCTestCase {
           "messages": [
             { "role": "user", "content": "What's the weather like in Boston?" }
           ],
+          "response_format": {
+            "type": "json_object"
+           },
           "functions": [
             {
               "name": "get_current_weather",