Skip to content

Commit

Permalink
Merge pull request #504 from Ryan526/o1Models
Browse files Browse the repository at this point in the history
Add o1 Models
  • Loading branch information
Niek authored Oct 17, 2024
2 parents ed9e551 + 9a2565a commit a64337c
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 11 deletions.
3 changes: 1 addition & 2 deletions src/lib/Chat.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,7 @@
chat,
autoAddMessages: false,
streaming: false,
summaryRequest: true,
maxTokens: 30
summaryRequest: true
})
try {
Expand Down
2 changes: 1 addition & 1 deletion src/lib/ChatRequest.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ export class ChatRequest {
if (typeof setting.apiTransform === 'function') {
value = setting.apiTransform(chatId, setting, value)
}
if (key === 'max_tokens') {
if (key === 'max_completion_tokens') {
if (opts.maxTokens) value = opts.maxTokens // only as large as requested
if (value > maxAllowed || value < 1) value = null // if over max model, do not define max
if (value) value = Math.floor(value)
Expand Down
2 changes: 1 addition & 1 deletion src/lib/ChatSettingsModal.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
const profileSelect = getChatSettingObjectByKey('profile') as ChatSetting & SettingSelect
profileSelect.options = await getProfileSelect()
chatDefaults.profile = await getDefaultProfileKey()
chatDefaults.max_tokens = getModelMaxTokens(chatSettings.model)
chatDefaults.max_completion_tokens = getModelMaxTokens(chatSettings.model)
// const defaultProfile = globalStore.defaultProfile || profileSelect.options[0].value
defaultProfile = await getDefaultProfileKey()
isDefault = defaultProfile === chatSettings.profile
Expand Down
2 changes: 1 addition & 1 deletion src/lib/Profiles.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ The user has walked in on [[CHARACTER_NAME]]. They are on the bridge of the Hear
profileName: 'Jen - An uninhibited virtual friend [LLaMA]',
temperature: 0.8,
top_p: 0.5,
max_tokens: 4096,
max_completion_tokens: 4096,
profileDescription: "*** NOT for OpenAI / ChatGPT models ***\n\nA profile attempting a coherent, unrestricted, playful and helpful chat with Stable Beluga 2 and other LLaMA instruct (non-chat) models running on Petals. The goal is to have an assistant that can talk to you about any topic and answer any question without lecturing you and continuously reminding it can't do things because it's only an AI language model. If you have a better a profile, I'm sure @Niek wouldn't mind a pull request or issue opened.\n\nNote that chat with Llama 2 models under Petals can fall apart quickly, devolving into repetitive responses and catch-phrases. The repetitionPentalty settings helps with that, but then also can keep it from repeating layout patterns you're prompting for, so it can be a delicate balance.\n\nThis profile uses:\n- A system prompt designed for character simulation\n- Modified delimiters, etc., to try to keep chat cohearent\n- A summary prompt",
continuousChat: 'summary',
summaryThreshold: 0.8,
Expand Down
6 changes: 3 additions & 3 deletions src/lib/Settings.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ const gptDefaults = {
n: 1,
stream: true,
stop: null,
max_tokens: 512,
max_completion_tokens: 512,
presence_penalty: 0,
frequency_penalty: 0,
logit_bias: null,
Expand Down Expand Up @@ -496,11 +496,11 @@ const chatSettingsList: ChatSetting[] = [
hide: hideModelSetting
},
{
key: 'max_tokens',
key: 'max_completion_tokens',
name: 'Max Tokens',
title: 'The maximum number of tokens to generate in the completion.\n' +
'\n' +
'The token count of your prompt plus max_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
'The token count of your prompt plus max_completion_tokens cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).\n',
min: 1,
max: 32768,
step: 1,
Expand Down
2 changes: 1 addition & 1 deletion src/lib/Types.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export type Request = {
n?: number;
stream?: boolean;
stop?: string | null;
max_tokens?: number;
max_completion_tokens?: number;
presence_penalty?: number;
frequency_penalty?: number;
logit_bias?: Record<string, number> | null;
Expand Down
16 changes: 15 additions & 1 deletion src/lib/providers/openai/models.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ const gpt4omini = {
completion: 0.00000060, // $0.00060 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const o1preview = {
...chatModelBase,
prompt: 0.000015, // $0.015 per 1000 tokens prompt
completion: 0.00006, // $0.06 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const o1mini = {
...chatModelBase,
prompt: 0.000003, // $0.003 per 1000 tokens prompt
completion: 0.000012, // $0.012 per 1000 tokens completion
max: 131072 // 128k max token buffer
}
const gpt432k = {
...chatModelBase,
prompt: 0.00006, // $0.06 per 1000 tokens prompt
Expand Down Expand Up @@ -129,7 +141,9 @@ export const chatModels : Record<string, ModelDetail> = {
'gpt-4-0125-preview': { ...gpt4128kpreview },
'gpt-4-32k': { ...gpt432k },
'gpt-4-32k-0314': { ...gpt432k },
'gpt-4-32k-0613': { ...gpt432k }
'gpt-4-32k-0613': { ...gpt432k },
'o1-preview': { ...o1preview },
'o1-mini': { ...o1mini }
}
export const fetchRemoteModels = async () => {
Expand Down
2 changes: 1 addition & 1 deletion src/lib/providers/petals/request.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ export const chatRequest = async (
!chatSettings.holdSocket && ws.close()
})
let maxLen = Math.min(opts.maxTokens || chatSettings.max_tokens || maxTokens, maxTokens)
let maxLen = Math.min(opts.maxTokens || chatSettings.max_completion_tokens || maxTokens, maxTokens)
let midDel = ''
for (let i = 0, l = delimiter.length; i < l; i++) {
Expand Down

0 comments on commit a64337c

Please sign in to comment.