Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Models and Enhancements #115

Merged
merged 14 commits into from
Nov 17, 2023
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -662,12 +662,16 @@ Models are represented as a typealias `typealias Model = String`.

```swift
public extension Model {
static let gpt4_1106_preview = "gpt-4-1106-preview"
static let gpt4_vision_preview = "gpt-4-vision-preview"
static let gpt4 = "gpt-4"
static let gpt4_0314 = "gpt-4-0314"
static let gpt4_32k = "gpt-4-32k"
static let gpt4_32k_0314 = "gpt-4-32k-0314"
static let gpt3_5Turbo = "gpt-3.5-turbo"
static let gpt3_5Turbo_1106 = "gpt-3.5-turbo-1106"
static let gpt3_5Turbo0301 = "gpt-3.5-turbo-0301"


static let textDavinci_003 = "text-davinci-003"
static let textDavinci_002 = "text-davinci-002"
Expand All @@ -693,6 +697,9 @@ public extension Model {
static let textModerationStable = "text-moderation-stable"
static let textModerationLatest = "text-moderation-latest"
static let moderation = "text-moderation-001"

static let dall_e_2 = "dall-e-2"
static let dall_e_3 = "dall-e-3"
rawnly marked this conversation as resolved.
Show resolved Hide resolved
}
```

Expand Down
16 changes: 14 additions & 2 deletions Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,34 @@

import Foundation

public enum AudioResponseFormat: String, Codable, Equatable {
case json
case text
case verboseJson = "verbose_json"
case srt
case vtt
}

public struct AudioTranscriptionQuery: Codable, Equatable {
public typealias ResponseFormat = AudioResponseFormat

public let file: Data
public let fileName: String
public let model: Model
public let responseFormat: Self.ResponseFormat?

public let prompt: String?
public let temperature: Double?
public let language: String?

public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil) {
public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) {
self.file = file
self.fileName = fileName
self.model = model
self.prompt = prompt
self.temperature = temperature
self.language = language
self.responseFormat = responseFormat
}
}

Expand All @@ -35,7 +46,8 @@ extension AudioTranscriptionQuery: MultipartFormDataBodyEncodable {
.string(paramName: "model", value: model),
.string(paramName: "prompt", value: prompt),
.string(paramName: "temperature", value: temperature),
.string(paramName: "language", value: language)
.string(paramName: "language", value: language),
.string(paramName: "response_format", value: responseFormat)
])
return bodyBuilder.build()
}
Expand Down
6 changes: 5 additions & 1 deletion Sources/OpenAI/Public/Models/AudioTranslationQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,23 @@
import Foundation

public struct AudioTranslationQuery: Codable, Equatable {
public typealias ResponseFormat = AudioResponseFormat

public let file: Data
public let fileName: String
public let model: Model

public let responseFormat: Self.ResponseFormat?
public let prompt: String?
public let temperature: Double?

public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil) {
public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) {
self.file = file
self.fileName = fileName
self.model = model
self.prompt = prompt
self.temperature = temperature
self.responseFormat = responseFormat
}
}

Expand All @@ -32,6 +35,7 @@ extension AudioTranslationQuery: MultipartFormDataBodyEncodable {
.file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"),
.string(paramName: "model", value: model),
.string(paramName: "prompt", value: prompt),
.string(paramName: "response_format", value: responseFormat),
.string(paramName: "temperature", value: temperature)
])
return bodyBuilder.build()
Expand Down
20 changes: 19 additions & 1 deletion Sources/OpenAI/Public/Models/ChatQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@

import Foundation

// See more https://platform.openai.com/docs/guides/text-generation/json-mode
public struct ResponseFormat: Codable, Equatable {
public static let jsonObject = ResponseFormat(type: .jsonObject)
public static let text = ResponseFormat(type: .text)

public let type: Self.ResponseFormatType

public enum ResponseFormatType: String, Codable, Equatable {
case jsonObject = "json_object"
case text
}
}

public struct Chat: Codable, Equatable {
public let role: Role
/// The contents of the message. `content` is required for all messages except assistant messages with function calls.
Expand Down Expand Up @@ -68,6 +81,7 @@ public struct ChatFunctionCall: Codable, Equatable {
}
}


/// See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
public struct JSONSchema: Codable, Equatable {
public let type: JSONType
Expand Down Expand Up @@ -211,6 +225,8 @@ public struct ChatQueryFunctionCall: Codable, Equatable {
public struct ChatQuery: Equatable, Codable, Streamable {
/// ID of the model to use. Currently, only gpt-3.5-turbo and gpt-3.5-turbo-0301 are supported.
public let model: Model
/// An object specifying the format that the model must output.
public let responseFormat: ResponseFormat?
/// The messages to generate chat completions for
public let messages: [Chat]
/// A list of functions the model may generate JSON inputs for.
Expand Down Expand Up @@ -279,16 +295,18 @@ public struct ChatQuery: Equatable, Codable, Streamable {
case frequencyPenalty = "frequency_penalty"
case logitBias = "logit_bias"
case user
case responseFormat = "response_format"
}

public init(model: Model, messages: [Chat], functions: [ChatFunctionDeclaration]? = nil, functionCall: FunctionCall? = nil, temperature: Double? = nil, topP: Double? = nil, n: Int? = nil, stop: [String]? = nil, maxTokens: Int? = nil, presencePenalty: Double? = nil, frequencyPenalty: Double? = nil, logitBias: [String : Int]? = nil, user: String? = nil, stream: Bool = false) {
public init(model: Model, messages: [Chat], responseFormat: ResponseFormat? = nil, functions: [ChatFunctionDeclaration]? = nil, functionCall: FunctionCall? = nil, temperature: Double? = nil, topP: Double? = nil, n: Int? = nil, stop: [String]? = nil, maxTokens: Int? = nil, presencePenalty: Double? = nil, frequencyPenalty: Double? = nil, logitBias: [String : Int]? = nil, user: String? = nil, stream: Bool = false) {
self.model = model
self.messages = messages
self.functions = functions
self.functionCall = functionCall
self.temperature = temperature
self.topP = topP
self.n = n
self.responseFormat = responseFormat
self.stop = stop
self.maxTokens = maxTokens
self.presencePenalty = presencePenalty
Expand Down
33 changes: 32 additions & 1 deletion Sources/OpenAI/Public/Models/ImagesQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,48 @@

import Foundation


public enum ImageResponseFormat: String, Codable, Equatable {
case url
case b64_json
}

public struct ImagesQuery: Codable {
public typealias ResponseFormat = ImageResponseFormat

/// A text description of the desired image(s). The maximum length is 1000 characters.
public let prompt: String

/// ID of the model to use.
public let model: Model?
/// The format in which the generated images are returned
public let responseFormat: Self.ResponseFormat?
/// The number of images to generate. Must be between 1 and 10.
public let n: Int?
/// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
public let size: String?
/// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
public let user: String?
/// The style of the generated images. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for dall-e-3.
public let style: String?

public init(prompt: String, n: Int?, size: String?) {
public init(prompt: String, model: Model?=nil, responseFormat: Self.ResponseFormat?=nil, n: Int?, size: String?, style: String?=nil, user:String?=nil) {
self.style = style
self.prompt = prompt
self.n = n
self.size = size
self.model = model
self.responseFormat = responseFormat
self.user = user
}

public enum CodingKeys: String, CodingKey {
case model
case prompt
case n
case size
case user
case style
case responseFormat = "response_format"
}
}
4 changes: 3 additions & 1 deletion Sources/OpenAI/Public/Models/ImagesResult.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ import Foundation
public struct ImagesResult: Codable, Equatable {

public struct URLResult: Codable, Equatable {
public let url: String
public let url: String?
public let b64_json: String?
}

public let created: TimeInterval
public let data: [URLResult]
}
18 changes: 18 additions & 0 deletions Sources/OpenAI/Public/Models/Models/Models.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ public extension Model {

/// More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. Will be updated with our latest model iteration 2 weeks after it is released.
static let gpt4 = "gpt-4"

/// GPT-4 Turbo, teh latest gpt-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling and more.
/// Maximum of 4096 output tokens
static let gpt4_1106_preview = "gpt-4-1106-preview"

/// Ability to understand images, in addition to all other GPT-4 Turbo capabilities.
static let gpt4_vision_preview = "gpt-4-vision-preview"

/// Snapshot of gpt-4 from March 14th 2023. Unlike gpt-4, this model will not receive updates, and will only be supported for a three month period ending on June 14th 2023.
@available(*, deprecated, message: "Please upgrade to the newer model")
static let gpt4_0314 = "gpt-4-0314"
Expand All @@ -22,15 +30,21 @@ public extension Model {
/// Same capabilities as the base gpt-4 mode but with 4x the context length. Will be updated with our latest model iteration.
static let gpt4_32k = "gpt-4-32k"
/// Snapshot of gpt-4-32 from March 14th 2023. Unlike gpt-4-32k, this model will not receive updates, and will only be supported for a three month period ending on June 14th 2023.
@available(*, deprecated, message: "Please upgrade to the newer model")
static let gpt4_32k_0314 = "gpt-4-32k-0314"
/// Snapshot of gpt-4-32 from June 13th 2023. Unlike gpt-4-32k, this model will not receive updates, and will be deprecated 3 months after a new version is released.
static let gpt4_32k_0613 = "gpt-4-32k-0613"

/// The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling and more.
static let gpt3_5Turbo_1106 = "gpt-3.5-turbo-1106"

/// Most capable GPT-3.5 model and optimized for chat at 1/10th the cost of text-davinci-003. Will be updated with our latest model iteration.
static let gpt3_5Turbo = "gpt-3.5-turbo"
/// Snapshot of gpt-3.5-turbo from March 1st 2023. Unlike gpt-3.5-turbo, this model will not receive updates, and will only be supported for a three month period ending on June 1st 2023.
@available(*, deprecated, message: "Please upgrade to the newer model")
static let gpt3_5Turbo0301 = "gpt-3.5-turbo-0301"
/// Snapshot of gpt-3.5-turbo from June 13th 2023 with function calling data. Unlike gpt-3.5-turbo, this model will not receive updates, and will be deprecated 3 months after a new version is released.
@available(*, deprecated, message: "Please upgrade to the newer model")
static let gpt3_5Turbo0613 = "gpt-3.5-turbo-0613"
/// Same capabilities as the standard gpt-3.5-turbo model but with 4 times the context.
static let gpt3_5Turbo_16k = "gpt-3.5-turbo-16k"
Expand Down Expand Up @@ -58,6 +72,10 @@ public extension Model {
// Transcriptions / Translations

static let whisper_1 = "whisper-1"

// Image Generation
static let dall_e_2 = "dall-e-2"
static let dall_e_3 = "dall-e-3"

// Fine Tunes

Expand Down
6 changes: 3 additions & 3 deletions Tests/OpenAITests/OpenAITests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ class OpenAITests: XCTestCase {
}

func testImages() async throws {
let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", n: 1, size: "1024x1024")
let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", model: .dall_e_2, n: 1, size: "1024x1024")
let imagesResult = ImagesResult(created: 100, data: [
.init(url: "http://foo.bar")
.init(url: "http://foo.bar", b64_json: nil)
])
try self.stub(result: imagesResult)
let result = try await openAI.images(query: query)
Expand Down Expand Up @@ -79,7 +79,7 @@ class OpenAITests: XCTestCase {
}

func testChatsFunction() async throws {
let query = ChatQuery(model: .gpt3_5Turbo0613, messages: [
let query = ChatQuery(model: .gpt3_5Turbo_1106, messages: [
.init(role: .system, content: "You are Weather-GPT. You know everything about the weather."),
.init(role: .user, content: "What's the weather like in Boston?"),
], functions: [
Expand Down
42 changes: 40 additions & 2 deletions Tests/OpenAITests/OpenAITestsDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,18 @@ class OpenAITestsDecoder: XCTestCase {
},
{
"url": "https://bar.foo"
},
{
"b64_json": "test"
}
]
}
"""

let expectedValue = ImagesResult(created: 1589478378, data: [
.init(url: "https://foo.bar"),
.init(url: "https://bar.foo")
.init(url: "https://foo.bar", b64_json: nil),
.init(url: "https://bar.foo", b64_json: nil),
.init(url: nil, b64_json: "test")
])
try decode(data, expectedValue)
}
Expand Down Expand Up @@ -106,13 +110,44 @@ class OpenAITestsDecoder: XCTestCase {
], usage: .init(promptTokens: 9, completionTokens: 12, totalTokens: 21))
try decode(data, expectedValue)
}

func testImageQuery() async throws {
let imageQuery = ImagesQuery(
prompt: "test",
model: .dall_e_2,
responseFormat: .b64_json,
n: 1,
size: "10",
style: "vivid",
user: "user"
)

let expectedValue = """
{
"model": "dall-e-2",
"prompt": "test",
"n": 1,
"size": "10",
"style": "vivid",
"user": "user",
"response_format": "b64_json"
}
"""

// To compare serialized JSONs we first convert them both into NSDictionary which are comparable (unline native swift dictionaries)
let imageQueryAsDict = try jsonDataAsNSDictionary(JSONEncoder().encode(imageQuery))
let expectedValueAsDict = try jsonDataAsNSDictionary(expectedValue.data(using: .utf8)!)

XCTAssertEqual(imageQueryAsDict, expectedValueAsDict)
}

func testChatQueryWithFunctionCall() async throws {
let chatQuery = ChatQuery(
model: .gpt3_5Turbo,
messages: [
Chat(role: .user, content: "What's the weather like in Boston?")
],
responseFormat: .init(type: .jsonObject),
functions: [
ChatFunctionDeclaration(
name: "get_current_weather",
Expand All @@ -135,6 +170,9 @@ class OpenAITestsDecoder: XCTestCase {
"messages": [
{ "role": "user", "content": "What's the weather like in Boston?" }
],
"response_format": {
"type": "json_object"
},
"functions": [
{
"name": "get_current_weather",
Expand Down