Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
Signed-off-by: Kai-Hsun Chen <kaihsun@anyscale.com>
  • Loading branch information
kevin85421 committed Sep 4, 2024
1 parent 3d9d612 commit 3ecf574
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/nightly-eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Run eval
timeout-minutes: 20
run: |
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --disable-radix-cache &
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --disable-radix-cache &
echo "Waiting for server to start..."
for i in {1..120}; do
Expand Down
2 changes: 1 addition & 1 deletion benchmark/blog_v0_2/405b_sglang.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# wget https://huggingface.co/neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8/resolve/main/tokenizer_config.json

# Launch sglang
# python -m sglang.launch_server --model ~/llama-3.1-405b-fp8-dummy/ --load-format dummy --tp 8 --quant fp8 --disable-radix --mem-frac 0.87
# python -m sglang.launch_server --model-path ~/llama-3.1-405b-fp8-dummy/ --load-format dummy --tp 8 --quant fp8 --disable-radix --mem-frac 0.87

# offline
python3 -m sglang.bench_serving --backend sglang --dataset-name random --num-prompt 3000 --random-input 1024 --random-output 1024 > sglang_log11
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/launch_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

if __name__ == "__main__":
server_args = prepare_server_args(sys.argv[1:])
model_override_args = server_args.model_override_args
model_override_args = server_args.json_model_override_args

try:
launch_server(server_args, model_override_args=model_override_args)
Expand Down
12 changes: 7 additions & 5 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ class ServerArgs:
nnodes: int = 1
node_rank: Optional[int] = None

# Model override args
model_override_args: Optional[dict] = None
# Model override args in JSON
json_model_override_args: Optional[dict] = None

def __post_init__(self):
if self.tokenizer_path is None:
Expand Down Expand Up @@ -461,7 +461,7 @@ def add_cli_args(parser: argparse.ArgumentParser):

# Model override args
parser.add_argument(
"--model-override-args",
"--json-model-override-args",
type=str,
help="A dictionary in JSON string format used to override default model configurations.",
)
Expand All @@ -470,8 +470,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
def from_cli_args(cls, args: argparse.Namespace):
args.tp_size = args.tensor_parallel_size
args.dp_size = args.data_parallel_size
args.model_override_args = (
json.loads(args.model_override_args) if args.model_override_args else None
args.json_model_override_args = (
json.loads(args.json_model_override_args)
if args.json_model_override_args
else None
)
attrs = [attr.name for attr in dataclasses.fields(cls)]
return cls(**{attr: getattr(args, attr) for attr in attrs})
Expand Down
1 change: 1 addition & 0 deletions test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"test_triton_attn_backend.py",
"test_update_weights.py",
"test_vision_openai_server.py",
"test_server_args.py",
],
"sampling/penaltylib": glob.glob(
"sampling/penaltylib/**/test_*.py", recursive=True
Expand Down
4 changes: 2 additions & 2 deletions test/srt/test_server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ def test_prepare_server_args(self):
[
"--model-path",
"model_path",
"--model-override-args",
"--json-model-override-args",
'{"rope_scaling": {"factor": 2.0, "type": "linear"}}',
]
)
self.assertEqual(server_args.model_path, "model_path")
self.assertEqual(
server_args.model_override_args,
server_args.json_model_override_args,
{"rope_scaling": {"factor": 2.0, "type": "linear"}},
)

Expand Down

0 comments on commit 3ecf574

Please sign in to comment.