Skip to content

✨ Add Binary Build and Publish Workflow #64

✨ Add Binary Build and Publish Workflow

✨ Add Binary Build and Publish Workflow #64

Workflow file for this run

name: Evaluation Matrix
on:
push:
branches:
- main
permissions:
deployments: write
contents: write
jobs:
evaluation:
name: Performance Evaluation
runs-on: ubuntu-latest
strategy:
matrix:
evaluation:
- provider: ChatIBMGenAI
model_prefix: codellama
model: codellama-34b-instruct
- provider: ChatIBMGenAI
model_prefix: deepseek-ai
model: deepseek-coder-33b-instruct
- provider: ChatIBMGenAI
model_prefix: meta-llama
model: llama-3-70b-instruct
max_new_tokens: 2048
- provider: ChatIBMGenAI
model_prefix: mistralai
model: mistral-7b-v0-1
- provider: ChatIBMGenAI
model_prefix: mistralai
model: mixtral-8x7b-instruct-v01
test:
- example: example_a
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@main
with:
python-version: 3.12
- name: Update environment
run: |
echo "MAX_NEW_TOKENS=${{ matrix.evaluation.max_new_tokens }}" >> $GITHUB_ENV
if: matrix.evaluation.max_new_tokens != null
- name: Run benchmark
run: |
pip install -r requirements.txt
pip install -e .
cd kai
cat << EOF > config.toml
log_level = "info"
file_log_level = "debug"
log_dir = "$pwd/logs"
demo_mode = false
trace_enabled = true
solution_consumers = ["diff_only", "llm_summary"]
[incident_store]
solution_detectors = "naive"
solution_producers = "text_only"
[incident_store.args]
provider = "postgresql"
host = "127.0.0.1"
database = "kai"
user = "kai"
password = "dog8code"
[embeddings]
todo = true
[models]
provider = "${{ matrix.evaluation.provider }}"
[models.args]
model_id = "${{ matrix.evaluation.model_prefix }}/${{ matrix.evaluation.model }}"
EOF
if [[ ! -z "${MAX_NEW_TOKENS}" ]]; then
cat << EOF >> config.toml
parameters.max_new_tokens = ${{ matrix.evaluation.max_new_tokens }}
EOF
fi
echo [{\"name\": \
\"${{ matrix.evaluation.provider }}_${{ matrix.evaluation.model_prefix }}_${{ matrix.evaluation.model }}_${{ matrix.test.example }}\", \
\"unit\": \"Match\", \
\"value\": \"$(PYTHONPATH=".." python evaluation.py --configs ./config.toml | tail -n 1 | awk '{ print $3 }')\" \
}] > ../output.txt
git checkout config.toml
cd ..
env:
GENAI_KEY: ${{ secrets.GENAI_KEY }}
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
with:
tool: customBiggerIsBetter
benchmark-data-dir-path: evaluations
output-file-path: output.txt
#fail-on-alert: true
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true