From 64bdde57e0f8a84ab1f977e27d82bbd0932356f1 Mon Sep 17 00:00:00 2001
From: Andy Linfoot <78757007+andy-neuma@users.noreply.github.com>
Date: Wed, 21 Feb 2024 12:50:46 -0500
Subject: [PATCH] initial GHA workflows for "build test" and "remote push"
 (#27)

SUMMARY:
* initial set of "actions with a little a" that are the building blocks
for eventual CI system
* "build test" workflow
* "remote push" workflow on `a10g`
* update some requirement files to have packages listed in alphabetical
order

NOTE: this PR is still somewhat nebulas as i'm still working through
building and testing "neuralmagic-vllm" in our automation environment.

TEST:
currently, i'm working through various workflow components, i.e.
"actions with a little a". the bits making up the actions in this PR
have been constructed from my notes along the way.

we can do a "complete" run that includes: linting, building, installing,
and running tests.

GHA link ...
https://github.com/neuralmagic/neuralmagic-vllm/actions/runs/7975058564
`testmo` ... https://neuralmagic.testmo.net/automation/runs/view/8097

Latest GHA link ...
https://github.com/neuralmagic/neuralmagic-vllm/actions/runs/7992489982

---------

Co-authored-by: andy-neuma <andy@neuralmagic.com>
---
 .github/actions/nm-build-vllm/action.yml      | 33 +++++++
 .github/actions/nm-lint-python/action.yml     | 23 +++++
 .github/actions/nm-mypy/action.yml            | 16 ++++
 .github/actions/nm-run-summary/action.yml     | 48 ++++++++++
 .github/actions/nm-set-env/action.yml         | 21 +++++
 .github/actions/nm-set-python/action.yml      | 30 ++++++
 .github/actions/nm-test-vllm/action.yml       | 36 ++++++++
 .../actions/nm-testmo-run-complete/action.yml | 35 +++++++
 .../actions/nm-testmo-run-create/action.yml   | 59 ++++++++++++
 .../nm-testmo-run-submit-thread/action.yml    | 59 ++++++++++++
 .github/scripts/determine-threading           | 49 ++++++++++
 .github/scripts/step-status                   | 14 +++
 .github/workflows/build-test.yml              | 92 ++++++++++++++++++-
 .github/workflows/remote-push.yml             | 30 ++++++
 .gitignore                                    |  4 +
 requirements-dev.txt                          | 14 +--
 requirements-neuron.txt                       | 12 +--
 requirements-rocm.txt                         | 12 +--
 requirements.txt                              | 11 ++-
 tests/models/test_mistral.py                  |  1 +
 tests/models/test_models.py                   |  1 +
 21 files changed, 574 insertions(+), 26 deletions(-)
 create mode 100644 .github/actions/nm-build-vllm/action.yml
 create mode 100644 .github/actions/nm-lint-python/action.yml
 create mode 100644 .github/actions/nm-mypy/action.yml
 create mode 100644 .github/actions/nm-run-summary/action.yml
 create mode 100644 .github/actions/nm-set-env/action.yml
 create mode 100644 .github/actions/nm-set-python/action.yml
 create mode 100644 .github/actions/nm-test-vllm/action.yml
 create mode 100644 .github/actions/nm-testmo-run-complete/action.yml
 create mode 100644 .github/actions/nm-testmo-run-create/action.yml
 create mode 100644 .github/actions/nm-testmo-run-submit-thread/action.yml
 create mode 100755 .github/scripts/determine-threading
 create mode 100755 .github/scripts/step-status
 create mode 100644 .github/workflows/remote-push.yml

diff --git a/.github/actions/nm-build-vllm/action.yml b/.github/actions/nm-build-vllm/action.yml
new file mode 100644
index 0000000000000..780c2f99de3c6
--- /dev/null
+++ b/.github/actions/nm-build-vllm/action.yml
@@ -0,0 +1,33 @@
+name: build neuralmagic-vllm
+description: 'build neuralmagic-vllm'
+inputs:
+  Gi_per_thread:
+    description: 'requested GiB to reserve per thread'
+    required: true
+  python:
+    description: 'python version, e.g. 3.10.12'
+    required: true
+  venv:
+    description: 'name for python virtual environment'
+    required: true
+outputs:
+  status:
+    description: "final build status from 'pip install -e'"
+    value: ${{ steps.build.outputs.status }}
+runs:
+  using: composite
+  steps:
+  - id: build
+    run: |
+      # TODO: this is a hack ... fix it later
+      # pyenv hardcoded ... python version hardcoded ...
+      COMMIT=${{ github.sha }}
+      VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+      source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      pip3 install --index-url http://192.168.201.226:8080/ --trusted-host 192.168.201.226 magic-wand
+      pip3 install -r requirements.txt
+      SUCCESS=0
+      pip3 install -e . || SUCCESS=$?
+      echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
+      exit ${SUCCESS}
+    shell: bash
diff --git a/.github/actions/nm-lint-python/action.yml b/.github/actions/nm-lint-python/action.yml
new file mode 100644
index 0000000000000..bcc27532dfb98
--- /dev/null
+++ b/.github/actions/nm-lint-python/action.yml
@@ -0,0 +1,23 @@
+name: lint python
+description: "runs 'ruff' and reports errors"
+outputs:
+  status:
+    description: "return code from 'ruff'"
+    value: ${{ steps.ruff.outputs.status }}
+runs:
+  using: composite
+  steps:
+    - id: ruff
+      run: |
+        SUCCESS=0
+        PYTHON_FILES=$(ruff .) || SUCCESS=$?
+        if [ ${SUCCESS} -ne 0 ]; then
+          echo "__Python Lint Failures:__" >> $GITHUB_STEP_SUMMARY
+          echo "${PYTHON_FILES}" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo -e "lint: \xE2\x9D\x8C __FAILED__" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+        fi
+        echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
+        exit ${SUCCESS}
+      shell: bash
diff --git a/.github/actions/nm-mypy/action.yml b/.github/actions/nm-mypy/action.yml
new file mode 100644
index 0000000000000..ec19d7f1fb3f5
--- /dev/null
+++ b/.github/actions/nm-mypy/action.yml
@@ -0,0 +1,16 @@
+name: mypy
+description: "run 'mypy' and report final status"
+outputs:
+  status:
+    description: "final status from 'mypy'"
+    value: ${{ steps.mypy.outputs.status }}
+runs:
+  using: composite
+  steps:
+  - id: mypy
+    run: |
+      SUCCESS=0
+      mypy || SUCCESS=$?
+      echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
+      exit ${SUCCESS}
+    shell: bash
diff --git a/.github/actions/nm-run-summary/action.yml b/.github/actions/nm-run-summary/action.yml
new file mode 100644
index 0000000000000..d3234beb748e6
--- /dev/null
+++ b/.github/actions/nm-run-summary/action.yml
@@ -0,0 +1,48 @@
+name: summary
+description: 'creates a neuralmagic GHA run summary'
+inputs:
+  label:
+    description: 'GHA runner label'
+    required: true
+  gitref:
+    description: 'git commit hash or branch name'
+    required: true
+  testmo_run_url:
+    description: 'testmo URL for this particular run'
+    required: true
+  python:
+    description: 'python version info'
+    required: true
+  lint_status:
+    description: 'status from python lint step'
+    required: true
+  build_status:
+    description: 'status from build step'
+    required: true
+  test_status:
+    description: 'status from test step'
+    required: true
+runs:
+  using: composite
+  steps:
+  - run: |
+      LINT_STATUS=${{ inputs.lint_status }}
+      LINT_EMOJI=$(./.github/scripts/step-status ${LINT_STATUS})
+      BUILD_STATUS=${{ inputs.build_status }}
+      BUILD_EMOJI=$(./.github/scripts/step-status ${BUILD_STATUS})
+      TEST_STATUS=${{ inputs.test_status }}
+      TEST_EMOJI=$(./.github/scripts/step-status ${TEST_STATUS})
+      echo "testmo URL: ${{ inputs.testmo_run_url }}" >> $GITHUB_STEP_SUMMARY
+      echo ""
+      echo "| Parameter | |" >> $GITHUB_STEP_SUMMARY
+      echo "|---|---|" >> $GITHUB_STEP_SUMMARY
+      echo "| label: | \`${{ inputs.label }}\` |" >> $GITHUB_STEP_SUMMARY
+      echo "| git sha: | \`${{ github.sha }}\` |" >> $GITHUB_STEP_SUMMARY
+      echo "| github actor: | '${{ github.actor }}' |" >> $GITHUB_STEP_SUMMARY
+      echo "| gitref: | '${{ inputs.gitref }}' |" >> $GITHUB_STEP_SUMMARY
+      echo "| branch name: | '${{ github.ref_name }}' |" >> $GITHUB_STEP_SUMMARY
+      echo "| python: | ${{ inputs.python }} |" >> $GITHUB_STEP_SUMMARY
+      echo "| lint: | ${LINT_EMOJI} |" >> $GITHUB_STEP_SUMMARY
+      echo "| build: | ${BUILD_EMOJI} |" >> $GITHUB_STEP_SUMMARY
+      echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY
+    shell: bash
diff --git a/.github/actions/nm-set-env/action.yml b/.github/actions/nm-set-env/action.yml
new file mode 100644
index 0000000000000..d5b108d97ba4a
--- /dev/null
+++ b/.github/actions/nm-set-env/action.yml
@@ -0,0 +1,21 @@
+name: set neuralmagic env
+description: 'sets environment variables for neuralmagic'
+inputs:
+  hf_home:
+    description: 'Hugging Face home'
+    required: true
+runs:
+  using: composite
+  steps:
+  - run: |
+      echo "HF_HOME=${HF_HOME_TOKEN}" >> $GITHUB_ENV
+      echo "TORCH_CUDA_ARCH_LIST=8.0+PTX" >> $GITHUB_ENV
+      echo "PYENV_ROOT=/usr/local/apps/pyenv" >> $GITHUB_ENV
+      echo "XDG_CONFIG_HOME=/usr/local/apps" >> $GITHUB_ENV
+      WHOAMI=$(whoami)
+      echo "PATH=/usr/local/apps/pyenv/plugins/pyenv-virtualenv/shims:/usr/local/apps/pyenv/shims:/usr/local/apps/pyenv/bin:/usr/local/apps/nvm/versions/node/v16.20.2/bin:/usr/local/cuda-12.1/bin:/usr/local/cuda-12.1/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/${WHOAMI}/.local/bin:" >> $GITHUB_ENV
+      echo "LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64::/usr/local/cuda-12.1/lib64:" >> $GITHUB_ENV
+      echo "PROJECT_ID=12" >> $GITHUB_ENV
+    env:
+        HF_HOME_TOKEN: ${{ inputs.hf_home }}
+    shell: bash
diff --git a/.github/actions/nm-set-python/action.yml b/.github/actions/nm-set-python/action.yml
new file mode 100644
index 0000000000000..7b37add439e35
--- /dev/null
+++ b/.github/actions/nm-set-python/action.yml
@@ -0,0 +1,30 @@
+name: set python
+description: 'sets python version and creates venv for neuralmagic'
+inputs:
+  python:
+    description: 'python version, e.g. 3.10.12'
+    required: true
+  venv:
+    description: 'name for python virtual environment'
+    required: true
+outputs:
+  version:
+    description: "result from 'python --version'"
+    value: ${{ steps.set_python.outputs.version }}
+runs:
+  using: composite
+  steps:
+    - id: set_python
+      run: |
+        command -v pyenv
+        pyenv root
+        pyenv versions
+        pyenv local ${{ inputs.python }}
+        COMMIT=${{ github.sha }}
+        VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+        pyenv virtualenv ${VENV}
+        source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+        pyenv versions
+        VERSION=$(python --version)
+        echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
+      shell: bash
diff --git a/.github/actions/nm-test-vllm/action.yml b/.github/actions/nm-test-vllm/action.yml
new file mode 100644
index 0000000000000..27dae15df0332
--- /dev/null
+++ b/.github/actions/nm-test-vllm/action.yml
@@ -0,0 +1,36 @@
+name: test neuralmagic-vllm
+description: "test neuralmagic-vllm via, 'pytest tests/'"
+inputs:
+  test_directory:
+    description: 'test directory, path is relative to neuralmagic-vllm'
+    required: true
+  test_xml:
+    description: 'filename for xml test results'
+    required: true
+  python:
+    description: 'python version, e.g. 3.10.12'
+    required: true
+  venv:
+    description: 'name for python virtual environment'
+    required: true
+outputs:
+  status:
+    description: "final status from 'pytest tests/'"
+    value: ${{ steps.test.outputs.status }}
+runs:
+  using: composite
+  steps:
+  - id: test
+    run: |
+      SUCCESS=0
+      # TODO: this is a hack ... fix it later
+      # pyenv hardcoded ... python version hardcoded ...
+      COMMIT=${{ github.sha }}
+      VENV="${{ inputs.venv }}-${COMMIT:0:7}"
+      source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
+      pip3 install --index-url http://192.168.201.226:8080/ --trusted-host 192.168.201.226 magic-wand
+      pip3 install -r requirements-dev.txt
+      pytest --junitxml=${{ inputs.test_xml }} ${{ inputs.test_directory }} || SUCCESS=$?
+      echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
+      exit ${SUCCESS}
+    shell: bash
diff --git a/.github/actions/nm-testmo-run-complete/action.yml b/.github/actions/nm-testmo-run-complete/action.yml
new file mode 100644
index 0000000000000..0f89cd8800211
--- /dev/null
+++ b/.github/actions/nm-testmo-run-complete/action.yml
@@ -0,0 +1,35 @@
+name: complete testmo run
+description: 'complete neuralmagic testmo run'
+inputs:
+  testmo_url:
+    description: 'testmo URL'
+    required: true
+  testmo_token:
+    description: 'testmo token'
+    required: true
+  testmo_run_id:
+    description: 'testmo run id'
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - run: |
+        echo "completing TESTMO run ..."
+        ## CHECK testmo_url and token
+        if [[ -z "${TESTMO_URL}" ]]; then
+          echo "The TESTMO_URL secret is not defined for this repository"
+          exit 1
+        fi
+        if [[ -z "${TESTMO_TOKEN}" ]]; then
+          echo "The TESTMO_TOKEN secret is not defined for this repository"
+          exit 1
+        fi
+        ## complete testmo run
+        npx testmo automation:run:complete \
+          --instance "${TESTMO_URL}" \
+          --run-id "${TESTMO_RUN_ID}"
+      env:
+        TESTMO_URL: ${{ inputs.testmo_url }}
+        TESTMO_TOKEN: ${{ inputs.testmo_token }}
+        TESTMO_RUN_ID: ${{ inputs.testmo_run_id }}
+      shell: bash
diff --git a/.github/actions/nm-testmo-run-create/action.yml b/.github/actions/nm-testmo-run-create/action.yml
new file mode 100644
index 0000000000000..9066a8c2f7dad
--- /dev/null
+++ b/.github/actions/nm-testmo-run-create/action.yml
@@ -0,0 +1,59 @@
+name: create testmo run
+description: 'create neuralmagic testmo run and return its ID'
+inputs:
+  testmo_url:
+    description: 'testmo URL'
+    required: true
+  testmo_token:
+    description: 'testmo token'
+    required: true
+  source:
+    description: "source for testmo, e.g. 'build-test'"
+    required: true
+outputs:
+  id:
+    description: 'testmo run id'
+    value: ${{ steps.testmo_id.outputs.id }}
+runs:
+  using: "composite"
+  steps:
+    - name: create run
+      id: testmo_id
+      run: |
+        echo "creating TESTMO run ..."
+        sudo mkdir -p ${HOME}/.npm
+        sudo chown -R $(whoami):$(whoami) ${HOME}/.npm
+        ## adjust resources and GHA link
+        npx testmo automation:resources:add-field --name git --type string --value ${GITHUB_SHA:0:7} --resources resources.json
+        RUN_URL="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+        ACTOR=${GITHUB_ACTOR}
+        BUILD=${ACTOR}-$(whoami)-gpu
+        echo "name: ${BUILD}"
+        echo "url: ${RUN_URL}"
+        npx testmo automation:resources:add-link --name ${BUILD} --url ${RUN_URL} --resources resources.json
+        ## CHECK testmo_url and token
+        if [[ -z "${TESTMO_URL}" ]]; then
+          echo "The TESTMO_URL secret is not defined for this repository"
+          exit 1
+        fi
+        if [[ -z "${TESTMO_TOKEN}" ]]; then
+          echo "The TESTMO_TOKEN secret is not defined for this repository"
+          exit 1
+        fi
+        ## construct name
+        BRANCH_NAME=${GITHUB_REF_NAME}
+        TMP=${ACTOR}-${BRANCH_NAME}
+        TESTMO_RUN_NAME=$(echo ${TMP} | awk '{print tolower($0)}')
+        echo "test run name: ${TESTMO_RUN_NAME}"
+        ## create testmo run
+        TESTMO_ID=$(npx testmo automation:run:create \
+          --instance "${TESTMO_URL}" \
+          --project-id "${PROJECT_ID}" \
+          --name "${TESTMO_RUN_NAME}" \
+          --source "${{ inputs.source }}" \
+          --resources resources.json)
+        echo "id=${TESTMO_ID}" >> "${GITHUB_OUTPUT}"
+      env:
+        TESTMO_URL: ${{ inputs.testmo_url }}
+        TESTMO_TOKEN: ${{ inputs.testmo_token }}
+      shell: bash
diff --git a/.github/actions/nm-testmo-run-submit-thread/action.yml b/.github/actions/nm-testmo-run-submit-thread/action.yml
new file mode 100644
index 0000000000000..b47c882e591d0
--- /dev/null
+++ b/.github/actions/nm-testmo-run-submit-thread/action.yml
@@ -0,0 +1,59 @@
+name: submit results to testmo run
+description: 'asynchronously submit step results to neuralmagic testmo run'
+inputs:
+  testmo_url:
+    description: 'testmo URL'
+    required: true
+  testmo_token:
+    description: 'testmo token'
+    required: true
+  testmo_run_id:
+    description: 'testmo run id'
+    required: true
+  results:
+    description: "directory of JUnit '*.xml' formatted result files"
+    required: true
+  step_status:
+    description: 'status of reported step'
+    required: true
+outputs:
+  status:
+    description: "status of updating testmo. if there was no update, then 'success' is returned."
+    value: ${{ steps.submit_thread.outputs.status }}
+runs:
+  using: "composite"
+  steps:
+    - id: submit_thread
+      run: |
+        ls -al
+        ## if results is non-existent or there aren't results, then nothing to submit ...
+        REPORT=1
+        RESULTS=
+        if [[ ! -d ${{ inputs.results }} ]]; then
+          REPORT=0
+        else
+          RESULTS=$(find ${{ inputs.results }} -type f -name "*.xml")
+        fi
+        if [[ -z "${RESULTS}" ]]; then
+          REPORT=0
+        fi
+        ## submit results?
+        SUCCESS=0
+        if [ ${REPORT} -eq 1 ]; then
+          echo "submitting results to TESTMO run ..."
+          ## not checking testmo_url and token as this should be
+          ## called between "create" and "complete"
+          npx testmo automation:run:submit-thread \
+            --instance ${TESTMO_URL} \
+            --run-id ${TESTMO_RUN_ID} \
+            --results ${RESULTS} \
+            -- ./.github/scripts/step-status ${{ inputs.step_status }}
+            SUCCESS=$?
+        fi
+        echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
+        exit ${SUCCESS}
+      env:
+        TESTMO_URL: ${{ inputs.testmo_url }}
+        TESTMO_TOKEN: ${{ inputs.testmo_token }}
+        TESTMO_RUN_ID: ${{ inputs.testmo_run_id }}
+      shell: bash
diff --git a/.github/scripts/determine-threading b/.github/scripts/determine-threading
new file mode 100755
index 0000000000000..11354772a541b
--- /dev/null
+++ b/.github/scripts/determine-threading
@@ -0,0 +1,49 @@
+#!/bin/bash -e
+
+usage() {
+    echo "Usage: ${0} <options>"
+    echo
+    echo "  -G    - number of GiB per processor (includes hyperthreads, default is 1 GiB)."
+    echo "  -h    - this list of options"
+    echo
+    exit 1
+}
+
+Gi_PER_PROC=1
+
+while getopts "hG:" OPT; do
+    case "${OPT}" in
+	h)
+	    usage
+	    ;;
+	G)
+	    Gi_PER_PROC="${OPTARG}"
+	    ;;
+    esac
+done
+
+
+# this includes hyperthreads, since we're only compiling code
+# ... not doing floating point calculations
+UNAME=$(uname)
+ALL_PROC=1
+TOTAL_MEM=0
+
+ALL_PROC=$(nproc --all)
+TOTAL_MEM=$(grep MemTotal /proc/meminfo)
+TOTAL_MEM=${TOTAL_MEM##MemTotal:}
+TOTAL_MEM=${TOTAL_MEM%%kB}
+TOTAL_MEM=$(echo $TOTAL_MEM | xargs)
+TOTAL_MEM=$((TOTAL_MEM / 1048576))
+
+USE_PROC=$((TOTAL_MEM / Gi_PER_PROC))
+
+# constrain to have at least 1 Gi per processor
+USE_PROC=$((USE_PROC > ALL_PROC ? ALL_PROC : USE_PROC))
+
+# if unable to determine total memory, then just set USE_PROC to 1
+if [ ${TOTAL_MEM} -eq 0 ]; then
+    USE_PROC=1
+fi
+
+echo ${USE_PROC}
diff --git a/.github/scripts/step-status b/.github/scripts/step-status
new file mode 100755
index 0000000000000..b07f17517be2b
--- /dev/null
+++ b/.github/scripts/step-status
@@ -0,0 +1,14 @@
+#!/bin/bash -e
+
+# echo "green encased checkmark" if "${1} == 0"
+# echo "red X"                   if "${1} != 0"
+
+STEP_STATUS=${1}
+
+if [ $STEP_STATUS -eq 0 ]; then
+    # green check
+    echo -e "\xE2\x9C\x85"
+else
+    # red x
+    echo -e "\xE2\x9D\x8C"
+fi
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 392a92fa637a0..26a9b5cb89bcd 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -57,6 +57,94 @@ jobs:
                 ref: ${{ inputs.gitref }}
                 submodules: recursive
 
-            - name: hello world
+            - name: setenv
+              id: setenv
+              uses: ./.github/actions/nm-set-env/
+              with:
+                hf_home: ${{ secrets.NM_HF_HOME }}
+
+            - name: set python
+              id: set_python
+              uses: ./.github/actions/nm-set-python/
+              with:
+                python: ${{ inputs.python }}
+                venv: TEST
+
+            # TODO: testmo source is currently hardcoded.
+            - name: create testmo run
+              id: create_testmo_run
+              uses: ./.github/actions/nm-testmo-run-create/
+              if: success() || failure()
+              with:
+                testmo_url: https://neuralmagic.testmo.net
+                testmo_token: ${{ secrets.TESTMO_TEST_TOKEN }}
+                source: 'build-test'
+
+            - name: python lint
+              id: lint
+              uses: ./.github/actions/nm-lint-python/
+
+            - name: build
+              id: build
+              uses: ./.github/actions/nm-build-vllm/
+              with:
+                Gi_per_thread: 1
+                python: ${{ inputs.python }}
+                venv: TEST
+
+            - name: test
+              id: test
+              uses: ./.github/actions/nm-test-vllm/
+              with:
+                test_directory: tests
+                test_xml: test-results/all_tests.xml
+                python: ${{ inputs.python }}
+                venv: TEST
+
+            - name: report test results
+              id: report_test
+              uses: ./.github/actions/nm-testmo-run-submit-thread/
+              if: success() || failure()
+              with:
+                testmo_url: https://neuralmagic.testmo.net
+                testmo_token: ${{ secrets.TESTMO_TEST_TOKEN }}
+                testmo_run_id: ${{ steps.create_testmo_run.outputs.id }}
+                results: test-results
+                step_status: ${{ steps.test.outputs.status }}
+
+            - name: summary
+              uses: ./.github/actions/nm-run-summary/
+              if: success() || failure()
+              with:
+                label: ${{ inputs.label }}
+                gitref: ${{ inputs.gitref }}
+                testmo_run_url: https://neuralmagic.testmo.net/automation/runs/view/${{ steps.create_testmo_run.outputs.id }}
+                python: ${{ steps.set_python.outputs.version }}
+                lint_status: ${{ steps.lint.outputs.status }}
+                build_status: ${{ steps.build.outputs.status }}
+                test_status: ${{ steps.test.outputs.status }}
+
+            - name: run status
+              id: run_status
+              if: success() || failure()
+              env:
+                CHECKOUT: ${{ steps.checkout.outcome }}
+                LINT_STATUS: ${{ steps.lint.outputs.status }}
+                BUILD_STATUS: ${{ steps.build.outputs.status }}
+                TEST_STATUS: ${{ steps.test.outputs.status }}
               run: |
-                echo "HELLO WORLD" >> $GITHUB_STEP_SUMMARY
+                  echo "checkout status: ${CHECKOUT}"
+                  if [[ "${CHECKOUT}" != *"success"* ]]; then exit 1; fi
+                  if [ ${LINT_STATUS} -ne 0 ]; then exit 1; fi
+                  if [ ${BUILD_STATUS} -ne 0 ]; then exit 1; fi
+                  echo "build status: ${BUILD_STATUS}"
+                  if [ ${TEST_STATUS} -ne 0 ]; then exit 1; fi
+                  echo "test status: ${TEST_STATUS}"
+
+            - name: complete testmo run
+              uses: ./.github/actions/nm-testmo-run-complete/
+              if: success() || failure()
+              with:
+                testmo_url: https://neuralmagic.testmo.net
+                testmo_token: ${{ secrets.TESTMO_TEST_TOKEN }}
+                testmo_run_id: ${{ steps.create_testmo_run.outputs.id }}
diff --git a/.github/workflows/remote-push.yml b/.github/workflows/remote-push.yml
new file mode 100644
index 0000000000000..c10b386ceb23e
--- /dev/null
+++ b/.github/workflows/remote-push.yml
@@ -0,0 +1,30 @@
+name: remote push
+run-name: ${{ github.actor }} verifying branch '${{ github.ref }}'
+on:
+  push:
+    branches-ignore:
+      - main
+
+concurrency:
+  group: ${{ github.head_ref || github.ref_name }}
+  cancel-in-progress: true
+
+jobs:
+
+    # TODO: expand python matrix later, once CI system has
+    #       matured.
+    # TODO: adjust timeout after we get a bit more experience.
+    #       making it 60 is a bit permissive.
+
+    # TODO: enable this later
+    AWS-AVX2-32G-A10G-24G:
+        strategy:
+            matrix:
+                python: [3.10.12]
+        uses: ./.github/workflows/build-test.yml
+        with:
+            label: aws-avx2-32G-a10g-24G
+            timeout: 60
+            gitref: '${{ github.ref }}'
+            python: ${{ matrix.python }}
+        secrets: inherit
diff --git a/.gitignore b/.gitignore
index b5195629e5cf3..9a2948ae01a60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -184,3 +184,7 @@ _build/
 
 # Benchmark dataset
 *.json
+
+# pyenv
+.python-version
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
index f8126008d0794..cbf099ab73f50 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,7 +1,7 @@
 # formatting
-yapf==0.32.0
-toml==0.10.2
 ruff==0.1.5
+toml==0.10.2
+yapf==0.32.0
 
 # type checking
 mypy==0.991
@@ -10,12 +10,12 @@ types-requests
 types-setuptools
 
 # testing
-pytest
-pytest-forked
-pytest-asyncio
-httpx
 einops # required for MPT
 flash_attn # required for HuggingFace's llama implementation
+httpx
 openai
+pytest
+pytest-asyncio
+pytest-forked
+ray
 requests
-ray
\ No newline at end of file
diff --git a/requirements-neuron.txt b/requirements-neuron.txt
index 3f30ed08f037d..da9c2de767af1 100644
--- a/requirements-neuron.txt
+++ b/requirements-neuron.txt
@@ -1,9 +1,9 @@
-sentencepiece  # Required for LLaMA tokenizer.
+aioprometheus[starlette]
+fastapi
+neuronx-cc
 numpy
-transformers-neuronx >= 0.9.0
+pydantic >= 2.0  # Required for OpenAI server.
+sentencepiece  # Required for LLaMA tokenizer.
 torch-neuronx >= 2.1.0
-neuronx-cc
-fastapi
+transformers-neuronx >= 0.9.0
 uvicorn[standard]
-pydantic >= 2.0  # Required for OpenAI server.
-aioprometheus[starlette]
diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index a846f929ef226..4a5a937f21030 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -1,13 +1,13 @@
+aioprometheus[starlette]
+fastapi
 ninja  # For faster builds.
-typing-extensions>=4.8.0
-starlette
+numpy
 psutil
+pydantic >= 2.0  # Required for OpenAI server.
 ray >= 2.5.1
 sentencepiece  # Required for LLaMA tokenizer.
-numpy
+starlette
 tokenizers>=0.15.0
 transformers >= 4.37.0  # Required for Mixtral.
-fastapi
+typing-extensions>=4.8.0
 uvicorn[standard]
-pydantic >= 2.0  # Required for OpenAI server.
-aioprometheus[starlette]
diff --git a/requirements.txt b/requirements.txt
index 299bad38fbf8a..ea0beaeeb91e4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,13 @@
+aioprometheus[starlette]
+fastapi
+# magic_wand == 0.0.1
 ninja  # For faster builds.
+numpy
 psutil
+pydantic >= 2.0  # Required for OpenAI server.
 ray >= 2.5.1
 sentencepiece  # Required for LLaMA tokenizer.
-numpy
 torch == 2.1.2
 transformers >= 4.37.0 # Required for Qwen2
-xformers == 0.0.23.post1  # Required for CUDA 12.1.
-fastapi
 uvicorn[standard]
-pydantic >= 2.0  # Required for OpenAI server.
-aioprometheus[starlette]
+xformers == 0.0.23.post1  # Required for CUDA 12.1.
diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py
index 83316fcb7469d..d1c7b5b99d7aa 100644
--- a/tests/models/test_mistral.py
+++ b/tests/models/test_mistral.py
@@ -9,6 +9,7 @@
 ]
 
 
+@pytest.mark.skip("running these on a10g results in process getting killed")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.parametrize("max_tokens", [128])
diff --git a/tests/models/test_models.py b/tests/models/test_models.py
index 40858a517b311..137d85e459785 100644
--- a/tests/models/test_models.py
+++ b/tests/models/test_models.py
@@ -13,6 +13,7 @@
 ]
 
 
+@pytest.mark.skip("running these on a10g results in process getting killed")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["float"])
 @pytest.mark.parametrize("max_tokens", [128])