-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new grpc server and client for tuning inference framework, dtype, inp…
…ut length and batch size on LLMs from huggingface Signed-off-by: ZHANGWENTAI <2092913428@qq.com>
- Loading branch information
1 parent
9fb1a10
commit 26ecbf8
Showing
30 changed files
with
1,526 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,3 +29,9 @@ vendor | |
|
||
# arch | ||
arch | ||
|
||
# model file cache | ||
.kubedl_model_cache/ | ||
|
||
# vllm wheel file | ||
*.whl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
syntax = "proto3"; | ||
|
||
package api.predict; | ||
|
||
option go_package = "../grpc_predict/go"; | ||
|
||
// Define prediction service | ||
service Predictor { | ||
// Perform model inference | ||
rpc Predict(PredictRequest) returns (PredictResponse); | ||
} | ||
|
||
// Prediction request | ||
message PredictRequest { | ||
bytes input_data = 1; // Input data, can be serialized tensor or other formats | ||
map<string, string> metadata = 2; // Additional metadata | ||
} | ||
|
||
// Prediction response | ||
message PredictResponse { | ||
bytes output_data = 1; // Output data, can be serialized tensor or other formats | ||
map<string, string> metadata = 2; // Additional metadata | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
# Set the proto file name | ||
export PROTO_FILE=predict.proto | ||
|
||
# Generate Go code | ||
protoc --go_out=. "$PROTO_FILE" | ||
|
||
# Generate Python code | ||
python3 -m grpc_tools.protoc -I. --python_out=python3 --grpc_python_out=python3 "$PROTO_FILE" | ||
|
||
# Output completion information | ||
echo "gRPC code generation completed for $PROTO_FILE" |
49 changes: 49 additions & 0 deletions
49
api/v1alpha1/grpc_proto/grpc_predict/python3/predict_pb2.py
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
101 changes: 101 additions & 0 deletions
101
api/v1alpha1/grpc_proto/grpc_predict/python3/predict_pb2_grpc.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! | ||
"""Client and server classes corresponding to protobuf-defined services.""" | ||
import grpc | ||
import warnings | ||
|
||
import predict_pb2 as predict__pb2 | ||
|
||
GRPC_GENERATED_VERSION = '1.66.2' | ||
GRPC_VERSION = grpc.__version__ | ||
_version_not_supported = False | ||
|
||
try: | ||
from grpc._utilities import first_version_is_lower | ||
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) | ||
except ImportError: | ||
_version_not_supported = True | ||
|
||
if _version_not_supported: | ||
raise RuntimeError( | ||
f'The grpc package installed is at version {GRPC_VERSION},' | ||
+ f' but the generated code in predict_pb2_grpc.py depends on' | ||
+ f' grpcio>={GRPC_GENERATED_VERSION}.' | ||
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' | ||
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' | ||
) | ||
|
||
|
||
class PredictorStub(object): | ||
"""Define prediction service | ||
""" | ||
|
||
def __init__(self, channel): | ||
"""Constructor. | ||
Args: | ||
channel: A grpc.Channel. | ||
""" | ||
self.Predict = channel.unary_unary( | ||
'/api.predict.Predictor/Predict', | ||
request_serializer=predict__pb2.PredictRequest.SerializeToString, | ||
response_deserializer=predict__pb2.PredictResponse.FromString, | ||
_registered_method=True) | ||
|
||
|
||
class PredictorServicer(object): | ||
"""Define prediction service | ||
""" | ||
|
||
def Predict(self, request, context): | ||
"""Perform model inference | ||
""" | ||
context.set_code(grpc.StatusCode.UNIMPLEMENTED) | ||
context.set_details('Method not implemented!') | ||
raise NotImplementedError('Method not implemented!') | ||
|
||
|
||
def add_PredictorServicer_to_server(servicer, server): | ||
rpc_method_handlers = { | ||
'Predict': grpc.unary_unary_rpc_method_handler( | ||
servicer.Predict, | ||
request_deserializer=predict__pb2.PredictRequest.FromString, | ||
response_serializer=predict__pb2.PredictResponse.SerializeToString, | ||
), | ||
} | ||
generic_handler = grpc.method_handlers_generic_handler( | ||
'api.predict.Predictor', rpc_method_handlers) | ||
server.add_generic_rpc_handlers((generic_handler,)) | ||
server.add_registered_method_handlers('api.predict.Predictor', rpc_method_handlers) | ||
|
||
|
||
# This class is part of an EXPERIMENTAL API. | ||
class Predictor(object): | ||
"""Define prediction service | ||
""" | ||
|
||
@staticmethod | ||
def Predict(request, | ||
target, | ||
options=(), | ||
channel_credentials=None, | ||
call_credentials=None, | ||
insecure=False, | ||
compression=None, | ||
wait_for_ready=None, | ||
timeout=None, | ||
metadata=None): | ||
return grpc.experimental.unary_unary( | ||
request, | ||
target, | ||
'/api.predict.Predictor/Predict', | ||
predict__pb2.PredictRequest.SerializeToString, | ||
predict__pb2.PredictResponse.FromString, | ||
options, | ||
channel_credentials, | ||
insecure, | ||
call_credentials, | ||
compression, | ||
wait_for_ready, | ||
timeout, | ||
metadata, | ||
_registered_method=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
syntax = "proto3"; | ||
package api.storage; | ||
option go_package = "../grpc_storage/go"; | ||
|
||
service DB { | ||
rpc SaveResult(SaveResultRequest) returns (SaveResultReply); | ||
rpc GetResult(GetResultRequest) returns (GetResultReply); | ||
} | ||
|
||
message KeyValue { | ||
string key = 1; | ||
string value = 2; | ||
} | ||
|
||
message SaveResultReply { | ||
} | ||
|
||
message SaveResultRequest { | ||
string namespace = 1; | ||
string trial_name = 2; | ||
// string experiment_name = 3; | ||
repeated KeyValue results = 4; | ||
} | ||
|
||
message GetResultRequest { | ||
string namespace = 1; | ||
string trial_name = 2; | ||
// string experiment_name = 3; | ||
} | ||
|
||
message GetResultReply { | ||
string namespace = 1; | ||
string trial_name = 2; | ||
// string experiment_name = 3; | ||
repeated KeyValue results = 4; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
export PROTO_FILE=api.proto | ||
protoc --go_out=plugins=grpc:./ "$PROTO_FILE" | ||
python3 -m grpc_tools.protoc -I. --python_out=python3 --grpc_python_out=python3 "$PROTO_FILE" |
47 changes: 47 additions & 0 deletions
47
api/v1alpha1/grpc_proto/grpc_storage_v2/python3/api_pb2.py
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.