✨ Add Binary Build and Publish Workflow #64

Workflow file for this run

.github/workflows/evaluation.yml at f78a98e

	name: Evaluation Matrix
	on:
	push:
	branches:
	- main

	permissions:
	deployments: write
	contents: write

	jobs:
	evaluation:
	name: Performance Evaluation
	runs-on: ubuntu-latest
	strategy:
	matrix:
	evaluation:
	- provider: ChatIBMGenAI
	model_prefix: codellama
	model: codellama-34b-instruct
	- provider: ChatIBMGenAI
	model_prefix: deepseek-ai
	model: deepseek-coder-33b-instruct
	- provider: ChatIBMGenAI
	model_prefix: meta-llama
	model: llama-3-70b-instruct
	max_new_tokens: 2048
	- provider: ChatIBMGenAI
	model_prefix: mistralai
	model: mistral-7b-v0-1
	- provider: ChatIBMGenAI
	model_prefix: mistralai
	model: mixtral-8x7b-instruct-v01
	test:
	- example: example_a
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-python@main
	with:
	python-version: 3.12
	- name: Update environment
	run: \|
	echo "MAX_NEW_TOKENS=${{ matrix.evaluation.max_new_tokens }}" >> $GITHUB_ENV
	if: matrix.evaluation.max_new_tokens != null
	- name: Run benchmark
	run: \|
	pip install -r requirements.txt
	pip install -e .
	cd kai
	cat << EOF > config.toml
	log_level = "info"
	file_log_level = "debug"
	log_dir = "$pwd/logs"
	demo_mode = false
	trace_enabled = true
	solution_consumers = ["diff_only", "llm_summary"]
	[incident_store]
	solution_detectors = "naive"
	solution_producers = "text_only"
	[incident_store.args]
	provider = "postgresql"
	host = "127.0.0.1"
	database = "kai"
	user = "kai"
	password = "dog8code"
	[embeddings]
	todo = true
	[models]
	provider = "${{ matrix.evaluation.provider }}"
	[models.args]
	model_id = "${{ matrix.evaluation.model_prefix }}/${{ matrix.evaluation.model }}"
	EOF
	if [[ ! -z "${MAX_NEW_TOKENS}" ]]; then
	cat << EOF >> config.toml
	parameters.max_new_tokens = ${{ matrix.evaluation.max_new_tokens }}
	EOF
	fi
	echo [{\"name\": \
	\"${{ matrix.evaluation.provider }}_${{ matrix.evaluation.model_prefix }}_${{ matrix.evaluation.model }}_${{ matrix.test.example }}\", \
	\"unit\": \"Match\", \
	\"value\": \"$(PYTHONPATH=".." python evaluation.py --configs ./config.toml \| tail -n 1 \| awk '{ print $3 }')\" \
	}] > ../output.txt
	git checkout config.toml
	cd ..
	env:
	GENAI_KEY: ${{ secrets.GENAI_KEY }}
	- name: Store benchmark result
	uses: benchmark-action/github-action-benchmark@v1
	with:
	tool: customBiggerIsBetter
	benchmark-data-dir-path: evaluations
	output-file-path: output.txt
	#fail-on-alert: true
	github-token: ${{ secrets.GITHUB_TOKEN }}
	auto-push: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

✨ Add Binary Build and Publish Workflow #64

Workflow file

✨ Add Binary Build and Publish Workflow #64

Jobs

Run details

Workflow file for this run