misc: use pip cache purge and add unit test ci (#871)

sgl-project · Aug 1, 2024 · 7f6c690 · 7f6c690
1 parent 40e6f51
commit 7f6c690
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 10 deletions.
diff --git a/.github/workflows/pr-e2e-test.yml b/.github/workflows/pr-e2e-test.yml
@@ -16,7 +16,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  gpu-job:
+  pr-e2e-test:
     runs-on: self-hosted
     env:
       CUDA_VISIBLE_DEVICES: 6
@@ -27,20 +27,17 @@ jobs:
 
     - name: Install dependencies
       run: |
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        pip cache purge
         pip install --upgrade pip
         pip install -e "python[all]"
         pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
         pip install --upgrade transformers
 
-    - name: Test OpenAI Backend
-      run: |
-        export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
-        cd test/lang
-        python3 test_openai_backend.py
-
     - name: Benchmark Serving
       run: |
-        python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        python3 -m sglang.launch_server --model /data/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
 
         echo "Waiting for server to start..."
         for i in {1..120}; do
@@ -55,7 +52,7 @@ jobs:
           sleep 1
         done
 
-        cd /home/lmzheng/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512
+        cd /data/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512
 
         echo "Stopping server..."
         kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}')
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
@@ -0,0 +1,42 @@
+name: Unit Test
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "python/sglang/**"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "python/sglang/**"
+  workflow_dispatch:
+
+concurrency:
+  group: unit-test-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  unit-test:
+    runs-on: self-hosted
+    env:
+      CUDA_VISIBLE_DEVICES: 6
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Install dependencies
+      run: |
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        pip cache purge
+        pip install --upgrade pip
+        pip install -e "python[all]"
+        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
+        pip install --upgrade transformers
+
+    - name: Test OpenAI Backend
+      run: |
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        cd test/lang
+        export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
+        python3 test_openai_backend.py
diff --git a/python/sglang/README.md b/python/sglang/README.md
@@ -1,4 +1,5 @@
-# Code Structure
+# Code Structures
+
 
 - `lang`: The frontend language.
 - `srt`: The backend engine for running local models. (SRT = SGLang Runtime).