frontend/server/src/main/resources/proto/management.proto

syntax = "proto3";

package org.pytorch.serve.grpc.management;

option java_multiple_files = true;

message ManagementResponse {
    // Response string of different management API calls.
    string msg = 1;
}

message DescribeModelRequest {
    // Name of model to describe.
    string model_name = 1; //required
    // Version of model to describe.
    string model_version = 2; //optional
    // Customized metadata
    bool customized = 3; //optional
}

message ListModelsRequest {
    // Use this parameter to specify the maximum number of items to return. When this value is present, TorchServe does not return more than the specified number of items, but it might return fewer. This value is optional. If you include a value, it must be between 1 and 1000, inclusive. If you do not include a value, it defaults to 100.
    int32 limit = 1; //optional

    // The token to retrieve the next set of results. TorchServe provides the token when the response from a previous call has more results than the maximum page size.
    int32 next_page_token = 2; //optional
}

message RegisterModelRequest {
    // Inference batch size, default: 1.
    int32 batch_size = 1; //optional

    // Inference handler entry-point. This value will override handler in MANIFEST.json if present.
    string handler = 2; //optional

    // Number of initial workers, default: 0.
    int32 initial_workers = 3; //optional

    // Maximum delay for batch aggregation, default: 100.
    int32 max_batch_delay = 4; //optional

    // Name of model. This value will override modelName in MANIFEST.json if present.
    string model_name = 5; //optional

    // Maximum time, in seconds, the TorchServe waits for a response from the model inference code, default: 120.
    int32 response_timeout = 6; //optional

    // Runtime for the model custom service code. This value will override runtime in MANIFEST.json if present.
    string runtime = 7; //optional

    // Decides whether creation of worker synchronous or not, default: false.
    bool synchronous = 8; //optional

    // Model archive download url, support local file or HTTP(s) protocol.
    string url = 9; //required

    // Decides whether S3 SSE KMS enabled or not, default: false.
    bool s3_sse_kms = 10; //optional

    // Maximum time, in seconds, the TorchServe waits for a model to startup, default: 120.
    int32 startup_timeout = 11; //optional
}

message ScaleWorkerRequest {

    // Name of model to scale workers.
    string model_name = 1; //required

    // Model version.
    string model_version = 2; //optional

    // Maximum number of worker processes.
    int32 max_worker = 3; //optional

    // Minimum number of worker processes.
    int32 min_worker = 4; //optional

    // Number of GPU worker processes to create.
    int32 number_gpu = 5; //optional

    // Decides whether the call is synchronous or not, default: false.
    bool synchronous = 6; //optional

    // Waiting up to the specified wait time if necessary for a worker to complete all pending requests. Use 0 to terminate backend worker process immediately. Use -1 for wait infinitely.
    int32 timeout = 7; //optional
}

message SetDefaultRequest {
    // Name of model whose default version needs to be updated.
    string model_name = 1; //required

    // Version of model to be set as default version for the model
    string model_version = 2; //required
}

message UnregisterModelRequest {
    // Name of model to unregister.
    string model_name = 1; //required

    // Name of model to unregister.
    string model_version = 2; //optional
}

service ManagementAPIsService {
    // Provides detailed information about the default version of a model.
    rpc DescribeModel(DescribeModelRequest) returns (ManagementResponse) {}

    // List registered models in TorchServe.
    rpc ListModels(ListModelsRequest) returns (ManagementResponse) {}

    // Register a new model in TorchServe.
    rpc RegisterModel(RegisterModelRequest) returns (ManagementResponse) {}

    // Configure number of workers for a default version of a model. This is an asynchronous call by default. Caller need to call describeModel to check if the model workers has been changed.
    rpc ScaleWorker(ScaleWorkerRequest) returns (ManagementResponse) {}

    // Set default version of a model
    rpc SetDefault(SetDefaultRequest) returns (ManagementResponse) {}

    // Unregister the default version of a model from TorchServe if it is the only version available. This is an asynchronous call by default. Caller can call listModels to confirm model is unregistered.
    rpc UnregisterModel(UnregisterModelRequest) returns (ManagementResponse) {}
}