Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: add model parameter validation rules and persistence tests #3618

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions extensions/inference-nitro-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ function loadLLMModel(settings: any): Promise<Response> {
if (!settings?.ngl) {
settings.ngl = 100
}
log(`[CORTEX]::Debug: Loading model with params ${JSON.stringify(settings)}`)
log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: 'POST',
headers: {
Expand All @@ -239,7 +239,7 @@ function loadLLMModel(settings: any): Promise<Response> {
})
.then((res) => {
log(
`[CORTEX]::Debug: Load model success with response ${JSON.stringify(
`[CORTEX]:: Load model success with response ${JSON.stringify(
res
)}`
)
Expand All @@ -260,7 +260,7 @@ function loadLLMModel(settings: any): Promise<Response> {
async function validateModelStatus(modelId: string): Promise<void> {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
log(`[CORTEX]::Debug: Validating model ${modelId}`)
log(`[CORTEX]:: Validating model ${modelId}`)
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'POST',
body: JSON.stringify({
Expand All @@ -275,7 +275,7 @@ async function validateModelStatus(modelId: string): Promise<void> {
retryDelay: 300,
}).then(async (res: Response) => {
log(
`[CORTEX]::Debug: Validate model state with response ${JSON.stringify(
`[CORTEX]:: Validate model state with response ${JSON.stringify(
res.status
)}`
)
Expand All @@ -286,7 +286,7 @@ async function validateModelStatus(modelId: string): Promise<void> {
// Otherwise, return an object with an error message.
if (body.model_loaded) {
log(
`[CORTEX]::Debug: Validate model state success with response ${JSON.stringify(
`[CORTEX]:: Validate model state success with response ${JSON.stringify(
body
)}`
)
Expand All @@ -295,7 +295,7 @@ async function validateModelStatus(modelId: string): Promise<void> {
}
const errorBody = await res.text()
log(
`[CORTEX]::Debug: Validate model state failed with response ${errorBody} and status is ${JSON.stringify(
`[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify(
res.statusText
)}`
)
Expand All @@ -310,7 +310,7 @@ async function validateModelStatus(modelId: string): Promise<void> {
async function killSubprocess(): Promise<void> {
const controller = new AbortController()
setTimeout(() => controller.abort(), 5000)
log(`[CORTEX]::Debug: Request to kill cortex`)
log(`[CORTEX]:: Request to kill cortex`)

const killRequest = () => {
return fetch(NITRO_HTTP_KILL_URL, {
Expand All @@ -321,17 +321,17 @@ async function killSubprocess(): Promise<void> {
.then(() =>
tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
)
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.then(() => log(`[CORTEX]:: cortex process is terminated`))
.catch((err) => {
log(
`[CORTEX]::Debug: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
`[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
)
throw 'PORT_NOT_AVAILABLE'
})
}

if (subprocess?.pid && process.platform !== 'darwin') {
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid
return new Promise((resolve, reject) => {
terminate(pid, function (err) {
Expand All @@ -341,7 +341,7 @@ async function killSubprocess(): Promise<void> {
} else {
tcpPortUsed
.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.then(() => log(`[CORTEX]:: cortex process is terminated`))
.then(() => resolve())
.catch(() => {
log(
Expand All @@ -362,7 +362,7 @@ async function killSubprocess(): Promise<void> {
* @returns A promise that resolves when the Nitro subprocess is started.
*/
function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
log(`[CORTEX]::Debug: Spawning cortex subprocess...`)
log(`[CORTEX]:: Spawning cortex subprocess...`)

return new Promise<void>(async (resolve, reject) => {
let executableOptions = executableNitroFile(
Expand All @@ -381,7 +381,7 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
// Execute the binary
log(
`[CORTEX]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)

Expand Down Expand Up @@ -415,23 +415,23 @@ function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {

// Handle subprocess output
subprocess.stdout.on('data', (data: any) => {
log(`[CORTEX]::Debug: ${data}`)
log(`[CORTEX]:: ${data}`)
})

subprocess.stderr.on('data', (data: any) => {
log(`[CORTEX]::Error: ${data}`)
})

subprocess.on('close', (code: any) => {
log(`[CORTEX]::Debug: cortex exited with code: ${code}`)
log(`[CORTEX]:: cortex exited with code: ${code}`)
subprocess = undefined
reject(`child process exited with code ${code}`)
})

tcpPortUsed
.waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
.then(() => {
log(`[CORTEX]::Debug: cortex is ready`)
log(`[CORTEX]:: cortex is ready`)
resolve()
})
})
Expand Down
4 changes: 2 additions & 2 deletions extensions/tensorrt-llm-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ function unloadModel(): Promise<void> {
}

if (subprocess?.pid) {
log(`[CORTEX]::Debug: Killing PID ${subprocess.pid}`)
log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
const pid = subprocess.pid
return new Promise((resolve, reject) => {
terminate(pid, function (err) {
Expand All @@ -107,7 +107,7 @@ function unloadModel(): Promise<void> {
return tcpPortUsed
.waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
.then(() => resolve())
.then(() => log(`[CORTEX]::Debug: cortex process is terminated`))
.then(() => log(`[CORTEX]:: cortex process is terminated`))
.catch(() => {
killRequest()
})
Expand Down
4 changes: 2 additions & 2 deletions web/containers/Providers/EventHandler.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { ulid } from 'ulidx'

import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'

import { toRuntimeParams } from '@/utils/modelParam'
import { extractInferenceParams } from '@/utils/modelParam'

import { extensionManager } from '@/extension'
import {
Expand Down Expand Up @@ -256,7 +256,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
},
]

const runtimeParams = toRuntimeParams(activeModelParamsRef.current)
const runtimeParams = extractInferenceParams(activeModelParamsRef.current)

const messageRequest: MessageRequest = {
id: msgId,
Expand Down
24 changes: 13 additions & 11 deletions web/containers/SliderRightPanel/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -87,26 +87,28 @@ const SliderRightPanel = ({
onValueChanged?.(Number(min))
setVal(min.toString())
setShowTooltip({ max: false, min: true })
} else {
setVal(Number(e.target.value).toString()) // There is a case .5 but not 0.5
}
}}
onChange={(e) => {
// TODO: How to support negative number input?
// Passthru since it validates again onBlur
if (/^\d*\.?\d*$/.test(e.target.value)) {
setVal(e.target.value)
}

// Should not accept invalid value or NaN
// E.g. anything changes that trigger onValueChanged
// Which is incorrect
if (Number(e.target.value) > Number(max)) {
setVal(max.toString())
} else if (
if (
Number(e.target.value) > Number(max) ||
Number(e.target.value) < Number(min) ||
!e.target.value.length
Number.isNaN(Number(e.target.value))
) {
setVal(min.toString())
} else if (Number.isNaN(Number(e.target.value))) return

onValueChanged?.(Number(e.target.value))
// TODO: How to support negative number input?
if (/^\d*\.?\d*$/.test(e.target.value)) {
setVal(e.target.value)
return
}
onValueChanged?.(Number(e.target.value))
}}
/>
}
Expand Down
9 changes: 6 additions & 3 deletions web/hooks/useSendChatMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ import {
import { Stack } from '@/utils/Stack'
import { compressImage, getBase64 } from '@/utils/base64'
import { MessageRequestBuilder } from '@/utils/messageRequestBuilder'
import { toRuntimeParams, toSettingParams } from '@/utils/modelParam'
import {
extractInferenceParams,
extractModelLoadParams,
} from '@/utils/modelParam'

import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder'

Expand Down Expand Up @@ -189,8 +192,8 @@ export default function useSendChatMessage() {

if (engineParamsUpdate) setReloadModel(true)

const runtimeParams = toRuntimeParams(activeModelParams)
const settingParams = toSettingParams(activeModelParams)
const runtimeParams = extractInferenceParams(activeModelParams)
const settingParams = extractModelLoadParams(activeModelParams)

const prompt = message.trim()

Expand Down
Loading
Loading