Skip to content

Commit

Permalink
Merge branch 'main' into topic/06-05-feat_add_agent_count_in_scaling_…
Browse files Browse the repository at this point in the history
…group_resolver
  • Loading branch information
fregataa committed Jul 3, 2024
2 parents 55998be + df57a09 commit a58dfe7
Show file tree
Hide file tree
Showing 286 changed files with 3,356 additions and 1,636 deletions.
1 change: 1 addition & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ indent_style = space
indent_size = 4

[*.md]
max_line_length = 0
trim_trailing_whitespace = false

[*.rst]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/alembic-head-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
check-multiple-heads:
if: ${{ contains(github.event.pull_request.labels.*.name, 'require:db-migration') && github.event.pull_request.merged == false }}
if: ${{ contains(github.event.pull_request.labels.*.name, 'require:db-migration') && !contains(fromJSON('["flow:merge-queue", "flow:hotfix"]'), github.event.label.name) && github.event.pull_request.merged == false }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/backport.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
echo "${milestones[@]}"
for i in "${!milestones[@]}"; do
if ! git branch -r | grep -q "origin/${milestones[$i]}\$"; then
if ! git ls-remote --heads | grep -q "refs/heads/${milestones[$i]}\$"; then
unset 'milestones[$i]'
fi
done
Expand Down
14 changes: 8 additions & 6 deletions .github/workflows/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ concurrency:

jobs:
lint:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') && github.event.pull_request.merged == false }}
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') && !contains(fromJSON('["flow:merge-queue", "flow:hotfix"]'), github.event.label.name) && github.event.pull_request.merged == false }}
runs-on: ubuntu-latest
steps:
- name: Calculate the fetch depth
Expand Down Expand Up @@ -79,7 +79,7 @@ jobs:


typecheck:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') && github.event.pull_request.merged == false }}
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') && !contains(fromJSON('["flow:merge-queue", "flow:hotfix"]'), github.event.label.name) && github.event.pull_request.merged == false }}
runs-on: ubuntu-latest
steps:
- name: Calculate the fetch depth
Expand Down Expand Up @@ -142,7 +142,7 @@ jobs:


test:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') && github.event.pull_request.merged == false }}
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') && !contains(fromJSON('["flow:merge-queue", "flow:hotfix"]'), github.event.label.name) && github.event.pull_request.merged == false }}
runs-on: [ubuntu-latest-8-cores]
steps:
- name: Calculate the fetch depth
Expand Down Expand Up @@ -295,7 +295,7 @@ jobs:
name: local-proxy-${{ matrix.os }}
path: dist-local-proxy/*
- name: Bootstrap Pants
uses: pantsbuild/actions/init-pants@v5-scie-pants
uses: pantsbuild/actions/init-pants@v8
with:
gha-cache-key: pants-cache-main-1-deploy-py${{ env.PROJECT_PYTHON_VERSION }}-${{ runner.os }}-${{ runner.arch }}
named-caches-hash: ${{ hashFiles('python*.lock', 'tools/*.lock') }}
Expand Down Expand Up @@ -353,7 +353,7 @@ jobs:
run: |
pip install -U 'packaging>=21.3'
- name: Bootstrap Pants
uses: pantsbuild/actions/init-pants@v5-scie-pants
uses: pantsbuild/actions/init-pants@v8
with:
gha-cache-key: pants-cache-main-1-deploy-py${{ env.PROJECT_PYTHON_VERSION }}-${{ runner.os }}-${{ runner.arch }}
named-caches-hash: ${{ hashFiles('python*.lock', 'tools/*.lock') }}
Expand Down Expand Up @@ -401,6 +401,8 @@ jobs:
needs: [build-scies, build-wheels, build-sbom]
if: github.event_name == 'push' && contains(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
permissions:
contents: write
environment: deploy-to-pypi
steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -448,7 +450,7 @@ jobs:
name: SBOM report
path: dist
- name: Release to GitHub
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@v2
with:
body_path: "CHANGELOG_RELEASE.md"
prerelease: ${{ env.IS_PRERELEASE }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs-preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ permissions:

jobs:
docs-preview-links-:
if: ${{ contains(github.event.pull_request.labels.*.name, 'area:docs') && github.event.pull_request.merged == false }}
if: ${{ contains(github.event.pull_request.labels.*.name, 'area:docs') && !contains(fromJSON('["flow:merge-queue", "flow:hotfix"]'), github.event.label.name) && github.event.pull_request.merged == false }}
runs-on: ubuntu-latest
steps:
- name: Make a link to the doc preview build (en)
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/timeline-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ on:
pull_request:
types: [labeled, unlabeled, opened, synchronize, reopened]
merge_group:
permissions:
contents: write

jobs:
pr-number-assign:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:changelog') && github.event.pull_request.number != null && github.event.pull_request.merged == false }}
if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:changelog') && !contains(fromJSON('["flow:merge-queue", "flow:hotfix"]'), github.event.label.name) && github.event.pull_request.number != null && github.event.pull_request.merged == false }}
uses: ./.github/workflows/pr-number-assign.yml
secrets:
WORKFLOW_PAT: ${{ secrets.WORKFLOW_PAT }}
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/update-api-schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ on:
jobs:
graphql-updated:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Calculate the fetch depth
run: |
Expand All @@ -20,13 +22,14 @@ jobs:
with:
fetch-depth: ${{ env.GIT_FETCH_DEPTH }}
ref: ${{ github.head_ref }}
token: ${{ secrets.OCTODOG }}
- name: Extract Python version from pants.toml
run: |
PYTHON_VERSION=$(grep -m 1 -oP '(?<=CPython==)([^"]+)' pants.toml)
echo "PANTS_CONFIG_FILES=pants.ci.toml" >> $GITHUB_ENV
echo "PROJECT_PYTHON_VERSION=$PYTHON_VERSION" >> $GITHUB_ENV
- name: Set up Python as Runtime
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ env.PROJECT_PYTHON_VERSION }}
- name: Set up remote cache backend (if applicable)
Expand Down Expand Up @@ -68,9 +71,13 @@ jobs:
needs: graphql-updated
name: Check Schema
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
checks: write
steps:
- uses: actions/checkout@v4
- uses: kamilkisiela/graphql-inspector@release-1689086705050
- uses: kamilkisiela/graphql-inspector@release-1717403590269
with:
schema: 'main:src/ai/backend/manager/api/schema.graphql'
rules: |
Expand Down
1 change: 1 addition & 0 deletions changes/1832.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Do not omit to update session's occupying resources to DB when a kernel starts.
1 change: 1 addition & 0 deletions changes/2041.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
New redis client (experimental)
1 change: 1 addition & 0 deletions changes/2128.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix wrong SQL query build for GQL Relay node
1 change: 1 addition & 0 deletions changes/2161.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix buggy resolver of `model_card` GQL Query.
1 change: 1 addition & 0 deletions changes/2205.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Ensure that utilization idleness is checked after a set period.
1 change: 1 addition & 0 deletions changes/2220.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for CentOS 8 based kernels
1 change: 1 addition & 0 deletions changes/2250.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix GraphQL to support query to non-installed images
1 change: 1 addition & 0 deletions changes/2255.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow modifying model service session's environment variable setup
1 change: 1 addition & 0 deletions changes/2256.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `endpoint.runtime_variant` column
1 change: 1 addition & 0 deletions changes/2258.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add new API to show list of supported inference runtimes
1 change: 1 addition & 0 deletions changes/2260.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for model service provisioning without `model-definition.yaml`
1 change: 1 addition & 0 deletions changes/2287.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Rename no-op `access_key` parameter of `endpoint_list` GQL Query to `user_uuid`
1 change: 1 addition & 0 deletions changes/2288.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix `ai.backend.service-ports` label syntax broken when image does not expose built-in service port
1 change: 1 addition & 0 deletions changes/2289.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve stability of `untag_image_from_registry` mutation
1 change: 1 addition & 0 deletions changes/2290.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SSH not working between kernels started with customized image
1 change: 1 addition & 0 deletions changes/2291.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Invalid container memory capacity reported
1 change: 1 addition & 0 deletions changes/2318.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix model service sessions created before 24.03.5 failing to spawn
1 change: 1 addition & 0 deletions changes/2319.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Image commit not working
1 change: 1 addition & 0 deletions changes/2320.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
model service session scheduler (`scale_services()`) failing when sessions bound to active route already marked as terminated
1 change: 1 addition & 0 deletions changes/2321.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix container metric collection halted on systems with Cgroups v1
29 changes: 29 additions & 0 deletions docs/release-notes/24.03.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# What's New

## Neo WebUI

This version introduces a next-generation WebUI preview with the dark-mode support.
You may toggle the switch in the session list view to use the "neo" session list and launcher,
which provides more streamlined UI.


## TUI Installer

From this release, we ship an open-source version TUI installer as a self-contained single-binary executable.
This installer is packaged using [the science project](https://github.com/a-scie) and provides a terminal-based GUI (aka TUI) for easier setup.


## Model Store

It introduces a globally shared predefined "model-store" project where users may clone the model vfolders to their accounts.


## VFolder Trash Bin

When users delete a vfolder, it is now sent to the trash bin instead of immediately removing all its contents.
This allows users or admins to undo accidental deletions, and decouples the storage backend's directory removal process from the vfolder management system.


## User-defined Image Commit

When allowed by the administrator, users may commit a running session's main container as a new container image.
47 changes: 47 additions & 0 deletions docs/release-notes/24.09.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# What's New

## Model Serving

(TODO)
<!-- auto-scaling -->


## Model Store

This release upgrades the model store with the metadata browser and improved UI.
Users can now import and run a model directly from configured external model repository services like HuggingFace.


## NVIDIA NIM Integration

If users have a valid license for NVIDIA NIM, they can launch a NIM container with one click as a model service.


## Fine-grained Access Control

(TODO)
<!--
Now projects may have their own administrators who can add or remove users belonging to the project and project-owned vfolders without manual intervention by the super-administrator.
Under the hood, the storage management subsystem has adopted a fine-grained RBAC to declare and query the privilege for a user to perform a specific action on a target object,
allowing more fine-grained customization of storage access policies for large enterprises.
-->


## FastTrack

(TODO)
<!-- project-wide collaboration -->


## Neo WebUI

This version continues transition to the next-generation WebUI.
(TODO: more details)


<!--
## Cancellable Image Pulling
Users can now trigger and cancel the session creation process and the required image pulling process separately,
helping the users to manage sessions stuck in the "PULLING" status due to various reasons (NFS mount failures, a broken Docker daemon, etc.).
-->
1 change: 1 addition & 0 deletions pants.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ use_rust_parser = true

[python-repos]
indexes = ["https://dist.backend.ai/pypi/simple/", "https://pypi.org/simple/"]
find_links = ["file://%(buildroot)s/wheelhouse"]

[python.resolves]
python-default = "python.lock"
Expand Down
13 changes: 12 additions & 1 deletion scripts/agent/build-dropbear.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -e

arch=$(uname -m)
distros=("ubuntu18.04" "ubuntu20.04" "ubuntu22.04" "alpine3.8")
distros=("alpine3.8" "centos8.0" "ubuntu18.04" "ubuntu20.04" "ubuntu22.04")

ubuntu1804_builder_dockerfile=$(cat <<'EOF'
FROM ubuntu:18.04
Expand Down Expand Up @@ -31,6 +31,16 @@ RUN apk add --no-cache make gcc musl-dev
RUN apk add --no-cache autoconf automake zlib-dev
EOF
)
centos8_builder_dockerfile=$(cat <<'EOF'
FROM centos:centos8
RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*
RUN dnf install -y make gcc automake autoconf dnf-plugins-core
RUN dnf config-manager --set-enabled powertools
RUN dnf install -y zlib-static glibc-static libxcrypt-static
EOF
)


build_script=$(cat <<'EOF'
#! /bin/sh
Expand Down Expand Up @@ -67,6 +77,7 @@ echo "$ubuntu1804_builder_dockerfile" > "$SCRIPT_DIR/dropbear-builder.ubuntu18.0
echo "$ubuntu2004_builder_dockerfile" > "$SCRIPT_DIR/dropbear-builder.ubuntu20.04.dockerfile"
echo "$ubuntu2204_builder_dockerfile" > "$SCRIPT_DIR/dropbear-builder.ubuntu22.04.dockerfile"
echo "$alpine_builder_dockerfile" > "$SCRIPT_DIR/dropbear-builder.alpine3.8.dockerfile"
echo "$centos8_builder_dockerfile" > "$SCRIPT_DIR/dropbear-builder.centos8.0.dockerfile"

for distro in "${distros[@]}"; do
docker build -t dropbear-builder:$distro \
Expand Down
19 changes: 15 additions & 4 deletions scripts/agent/build-sftpserver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
set -e

arch=$(uname -m)
distros=("ubuntu16.04" "ubuntu18.04" "ubuntu20.04" "centos7.6" "alpine3.8")
distros=("alpine3.8" "centos7.6" "centos8.0" "ubuntu16.04" "ubuntu18.04" "ubuntu20.04")

static_libs_dockerfile_part=$(cat <<'EOF'
ENV ZLIB_VER=1.2.11 \
ENV ZLIB_VER=1.3.1 \
SSL_VER=1.1.1i
RUN wget https://www.zlib.net/zlib-${ZLIB_VER}.tar.gz -O /root/zlib-${ZLIB_VER}.tar.gz && \
Expand Down Expand Up @@ -70,6 +70,16 @@ RUN yum install -y wget
EOF
)

centos8_builder_dockerfile=$(cat <<'EOF'
FROM centos:centos8
RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*
RUN dnf install -y make gcc
RUN dnf install -y autoconf
RUN dnf install -y wget
EOF
)

alpine_builder_dockerfile=$(cat <<'EOF'
FROM alpine:3.8
RUN apk add --no-cache make gcc musl-dev
Expand Down Expand Up @@ -107,6 +117,7 @@ echo -e "$ubuntu1604_builder_dockerfile\n$static_libs_dockerfile_part" > "$SCRIP
echo -e "$ubuntu1804_builder_dockerfile\n$static_libs_dockerfile_part" > "$SCRIPT_DIR/sftpserver-builder.ubuntu18.04.dockerfile"
echo -e "$ubuntu2004_builder_dockerfile\n$static_libs_dockerfile_part" > "$SCRIPT_DIR/sftpserver-builder.ubuntu20.04.dockerfile"
echo -e "$centos_builder_dockerfile\n$static_libs_dockerfile_part" > "$SCRIPT_DIR/sftpserver-builder.centos7.6.dockerfile"
echo -e "$centos8_builder_dockerfile\n$static_libs_dockerfile_part" > "$SCRIPT_DIR/sftpserver-builder.centos8.0.dockerfile"
echo -e "$alpine_builder_dockerfile\n$static_libs_dockerfile_part" > "$SCRIPT_DIR/sftpserver-builder.alpine3.8.dockerfile"

for distro in "${distros[@]}"; do
Expand All @@ -129,8 +140,8 @@ for distro in "${distros[@]}"; do
done

ls -l .
cp sftp-server.*.bin $SCRIPT_DIR/../src/ai/backend/runner
cp scp.*.bin $SCRIPT_DIR/../src/ai/backend/runner
cp sftp-server.*.bin $SCRIPT_DIR/../../src/ai/backend/runner
cp scp.*.bin $SCRIPT_DIR/../../src/ai/backend/runner

cd $SCRIPT_DIR/..
rm -rf "$temp_dir"
13 changes: 11 additions & 2 deletions scripts/agent/build-suexec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
set -e

arch=$(uname -m)
# distros=("ubuntu16.04" "ubuntu18.04" "ubuntu20.04" "centos7.6" "alpine3.8")
distros=("ubuntu16.04")

distros=("alpine3.8" "centos7.6" "centos8.0" "ubuntu16.04" "ubuntu18.04" "ubuntu20.04")

if [ $arch = "arm64" ]; then
arch="aarch64"
Expand Down Expand Up @@ -36,6 +36,14 @@ RUN yum install -y make gcc
EOF
)

centos8_builder_dockerfile=$(cat <<'EOF'
FROM centos:centos8
RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*
RUN dnf install -y make gcc
EOF
)

alpine_builder_dockerfile=$(cat <<'EOF'
FROM alpine:3.8
RUN apk add --no-cache make gcc musl-dev
Expand All @@ -61,6 +69,7 @@ echo "$ubuntu1604_builder_dockerfile" > "$SCRIPT_DIR/suexec-builder.ubuntu16.04.
echo "$ubuntu1804_builder_dockerfile" > "$SCRIPT_DIR/suexec-builder.ubuntu18.04.dockerfile"
echo "$ubuntu2004_builder_dockerfile" > "$SCRIPT_DIR/suexec-builder.ubuntu20.04.dockerfile"
echo "$centos_builder_dockerfile" > "$SCRIPT_DIR/suexec-builder.centos7.6.dockerfile"
echo "$centos8_builder_dockerfile" > "$SCRIPT_DIR/suexec-builder.centos8.0.dockerfile"
echo "$alpine_builder_dockerfile" > "$SCRIPT_DIR/suexec-builder.alpine3.8.dockerfile"

for distro in "${distros[@]}"; do
Expand Down
Loading

0 comments on commit a58dfe7

Please sign in to comment.