-
Notifications
You must be signed in to change notification settings - Fork 863
/
management.proto
122 lines (87 loc) · 4.56 KB
/
management.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
syntax = "proto3";
package org.pytorch.serve.grpc.management;
option java_multiple_files = true;
message ManagementResponse {
// Response string of different management API calls.
string msg = 1;
}
message DescribeModelRequest {
// Name of model to describe.
string model_name = 1; //required
// Version of model to describe.
string model_version = 2; //optional
// Customized metadata
bool customized = 3; //optional
}
message ListModelsRequest {
// Use this parameter to specify the maximum number of items to return. When this value is present, TorchServe does not return more than the specified number of items, but it might return fewer. This value is optional. If you include a value, it must be between 1 and 1000, inclusive. If you do not include a value, it defaults to 100.
int32 limit = 1; //optional
// The token to retrieve the next set of results. TorchServe provides the token when the response from a previous call has more results than the maximum page size.
int32 next_page_token = 2; //optional
}
message RegisterModelRequest {
// Inference batch size, default: 1.
int32 batch_size = 1; //optional
// Inference handler entry-point. This value will override handler in MANIFEST.json if present.
string handler = 2; //optional
// Number of initial workers, default: 0.
int32 initial_workers = 3; //optional
// Maximum delay for batch aggregation, default: 100.
int32 max_batch_delay = 4; //optional
// Name of model. This value will override modelName in MANIFEST.json if present.
string model_name = 5; //optional
// Maximum time, in seconds, the TorchServe waits for a response from the model inference code, default: 120.
int32 response_timeout = 6; //optional
// Runtime for the model custom service code. This value will override runtime in MANIFEST.json if present.
string runtime = 7; //optional
// Decides whether creation of worker synchronous or not, default: false.
bool synchronous = 8; //optional
// Model archive download url, support local file or HTTP(s) protocol.
string url = 9; //required
// Decides whether S3 SSE KMS enabled or not, default: false.
bool s3_sse_kms = 10; //optional
// Maximum time, in seconds, the TorchServe waits for a model to startup, default: 120.
int32 startup_timeout = 11; //optional
}
message ScaleWorkerRequest {
// Name of model to scale workers.
string model_name = 1; //required
// Model version.
string model_version = 2; //optional
// Maximum number of worker processes.
int32 max_worker = 3; //optional
// Minimum number of worker processes.
int32 min_worker = 4; //optional
// Number of GPU worker processes to create.
int32 number_gpu = 5; //optional
// Decides whether the call is synchronous or not, default: false.
bool synchronous = 6; //optional
// Waiting up to the specified wait time if necessary for a worker to complete all pending requests. Use 0 to terminate backend worker process immediately. Use -1 for wait infinitely.
int32 timeout = 7; //optional
}
message SetDefaultRequest {
// Name of model whose default version needs to be updated.
string model_name = 1; //required
// Version of model to be set as default version for the model
string model_version = 2; //required
}
message UnregisterModelRequest {
// Name of model to unregister.
string model_name = 1; //required
// Name of model to unregister.
string model_version = 2; //optional
}
service ManagementAPIsService {
// Provides detailed information about the default version of a model.
rpc DescribeModel(DescribeModelRequest) returns (ManagementResponse) {}
// List registered models in TorchServe.
rpc ListModels(ListModelsRequest) returns (ManagementResponse) {}
// Register a new model in TorchServe.
rpc RegisterModel(RegisterModelRequest) returns (ManagementResponse) {}
// Configure number of workers for a default version of a model. This is an asynchronous call by default. Caller need to call describeModel to check if the model workers has been changed.
rpc ScaleWorker(ScaleWorkerRequest) returns (ManagementResponse) {}
// Set default version of a model
rpc SetDefault(SetDefaultRequest) returns (ManagementResponse) {}
// Unregister the default version of a model from TorchServe if it is the only version available. This is an asynchronous call by default. Caller can call listModels to confirm model is unregistered.
rpc UnregisterModel(UnregisterModelRequest) returns (ManagementResponse) {}
}