Strange behavior with attn_implementation="eager" #141

Workflow file for this run

.github/workflows/self-comment-ci.yml at 5a2aedc

	name: PR comment GitHub CI

	on:
	issue_comment:
	types:
	- created
	branches-ignore:
	- main
	concurrency:
	group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') \|\| startsWith(github.event.comment.body, 'run slow') \|\| startsWith(github.event.comment.body, 'run_slow') }}
	cancel-in-progress: true

	jobs:
	get-pr-number:
	runs-on: ubuntu-22.04
	name: Get PR number
	# For security: only allow team members to run
	if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') \|\| startsWith(github.event.comment.body, 'run slow') \|\| startsWith(github.event.comment.body, 'run_slow')) }}
	outputs:
	PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
	steps:
	- name: Get PR number
	shell: bash
	run: \|
	if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
	echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
	else
	echo "PR_NUMBER=" >> $GITHUB_ENV
	fi

	- name: Check PR number
	shell: bash
	run: \|
	echo "${{ env.PR_NUMBER }}"

	- name: Set PR number
	id: set_pr_number
	run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"

	get-sha:
	runs-on: ubuntu-22.04
	needs: get-pr-number
	if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
	outputs:
	PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: "0"
	ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"

	- name: Get SHA
	id: get_sha
	env:
	PR_NUMBER: ${{needs.get-pr-number.outputs.PR_NUMBER}}
	run: \|
	git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
	git checkout refs/remotes/pull/$PR_NUMBER/head
	echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
	echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"

	# use a python script to handle this complex logic
	# case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
	# case 2: `run-slow model_1, model_2`
	get-tests:
	runs-on: ubuntu-22.04
	needs: get-pr-number
	if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
	permissions: write-all
	outputs:
	models: ${{ steps.models_to_run.outputs.models }}
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: "0"
	ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"

	- name: Get models to test
	env:
	PR_COMMENT: ${{ github.event.comment.body }}
	run: \|
	python -m pip install GitPython
	python utils/pr_slow_ci_models.py --message "$PR_COMMENT" \| tee output.txt
	echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV

	- name: Show models to test
	id: models_to_run
	run: \|
	echo "${{ env.models }}"
	echo "models=${{ env.models }}" >> $GITHUB_ENV
	echo "models=${{ env.models }}" >> $GITHUB_OUTPUT

	- name: Reply to the comment
	if: ${{ env.models != '[]' }}
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	gh api \
	--method POST \
	-H "Accept: application/vnd.github+json" \
	-H "X-GitHub-Api-Version: 2022-11-28" \
	repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
	-f "body=This comment contains run-slow, running the specified jobs: ${{ env.models }} ..."

	create_run:
	name: Create run
	if: ${{ needs.get-tests.outputs.models != '[]' }}
	needs: [get-sha, get-tests]
	permissions: write-all
	runs-on: ubuntu-22.04
	steps:
	- name: Create Run
	id: create_run
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	# Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
	# See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
	GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
	run: \|
	gh api \
	--method POST \
	-H "Accept: application/vnd.github+json" \
	-H "X-GitHub-Api-Version: 2022-11-28" \
	repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
	-f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"

	run_models_gpu:
	name: Run all tests for the model
	if: ${{ needs.get-tests.outputs.models != '[]' }}
	needs: [get-pr-number, get-tests, create_run]
	strategy:
	fail-fast: false
	matrix:
	folders: ${{ fromJson(needs.get-tests.outputs.models) }}
	machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
	runs-on:
	group: '${{ matrix.machine_type }}'
	container:
	image: huggingface/transformers-all-latest-gpu
	options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
	steps:
	- name: Echo input and matrix info
	shell: bash
	run: \|
	echo "${{ matrix.folders }}"

	- name: Echo folder ${{ matrix.folders }}
	shell: bash
	# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
	# set the artifact folder names (because the character `/` is not allowed).
	run: \|
	echo "${{ matrix.folders }}"
	matrix_folders=${{ matrix.folders }}
	matrix_folders=${matrix_folders/'models/'/'models_'}
	echo "$matrix_folders"
	echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV

	- name: Checkout to PR merge commit
	working-directory: /transformers
	run: \|
	git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
	git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
	git log -1 --format=%H

	- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
	working-directory: /transformers
	run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

	- name: NVIDIA-SMI
	run: \|
	nvidia-smi

	- name: Set `machine_type` for report and artifact names
	working-directory: /transformers
	shell: bash
	run: \|
	echo "${{ matrix.machine_type }}"
	if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
	machine_type=single-gpu
	elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
	machine_type=multi-gpu
	else
	machine_type=${{ matrix.machine_type }}
	fi
	echo "$machine_type"
	echo "machine_type=$machine_type" >> $GITHUB_ENV

	- name: Environment
	working-directory: /transformers
	run: \|
	python3 utils/print_env.py

	- name: Show installed libraries and their versions
	working-directory: /transformers
	run: pip freeze

	- name: Run all tests on GPU
	working-directory: /transformers
	run: \|
	export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
	echo $CUDA_VISIBLE_DEVICES
	python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}

	- name: Failure short reports
	if: ${{ failure() }}
	continue-on-error: true
	run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt

	- name: Make sure report directory exists
	shell: bash
	run: \|
	mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
	echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
	echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"

	- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
	if: ${{ always() }}
	uses: actions/upload-artifact@v4
	with:
	name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
	path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

	update_run_status:
	name: Update Check Run Status
	needs: [get-sha, create_run, run_models_gpu]
	permissions: write-all
	if: ${{ always() && needs.create_run.result == 'success' }}
	runs-on: ubuntu-22.04
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
	steps:
	- name: Get `run_models_gpu` job status
	run: \|
	echo "${{ needs.run_models_gpu.result }}"
	if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then
	echo "STATUS=failure" >> $GITHUB_ENV
	elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then
	echo "STATUS=success" >> $GITHUB_ENV
	else
	echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV
	fi

	- name: Update PR commit statuses
	run: \|
	echo "${{ needs.run_models_gpu.result }}"
	echo "${{ env.STATUS }}"
	gh api \
	--method POST \
	-H "Accept: application/vnd.github+json" \
	-H "X-GitHub-Api-Version: 2022-11-28" \
	repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
	-f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Strange behavior with attn_implementation="eager" #141

Workflow file

Strange behavior with attn_implementation="eager" #141

Jobs

Run details

Workflow file for this run