Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: upsert not working because sqlite3 version #3425

Merged
merged 16 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions .github/workflows/build-push-docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: Build Argilla Docker image

on:
workflow_call:
inputs:
download-python-package:
description: "True if python package should be downloaded"
type: boolean
default: false
image-name:
description: "Name of the image to build"
required: true
type: string
dockerfile:
description: "Path to the Dockerfile to build"
required: true
type: string
platforms:
description: "Platforms to build for"
required: true
type: string
build-args:
description: "Build arguments"
required: false
type: string
default: ""
readme:
description: "Path to the README file"
required: false
type: string
default: "README.md"
outputs:
version:
description: "Version of the Docker image"
value: ${{ jobs.build.outputs.version }}

jobs:
build:
name: Build Docker image
runs-on: ubuntu-latest
outputs:
version: ${{ steps.meta.outputs.version }}
steps:
- uses: actions/checkout@v3
- name: Download python package
uses: actions/download-artifact@v3
if: ${{ inputs.download-python-package }}
with:
name: python-package
path: docker/dist
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Docker meta
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ inputs.image-name }}
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.AR_DOCKER_USERNAME }}
password: ${{ secrets.AR_DOCKER_PASSWORD }}
- name: Build and push
uses: docker/build-push-action@v4
with:
context: docker
file: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: ${{ inputs.build-args }}
push: true
- name: Docker Hub Description
uses: peter-evans/dockerhub-description@v3
if: github.event_name == 'release'
with:
username: ${{ secrets.AR_DOCKER_USERNAME }}
password: ${{ secrets.AR_DOCKER_PASSWORD }}
repository: ${{ inputs.image-name }}
readme-filepath: ${{ inputs.readme }}
86 changes: 29 additions & 57 deletions .github/workflows/package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ jobs:
if: needs.deployable_check.outputs.isDeployable == 'true'
secrets: inherit

deploy_docker:
name: Build docker image
runs-on: ubuntu-latest
build_server_docker_image:
name: Build Argilla server docker image
uses: ./.github/workflows/build-push-docker.yml
needs:
- build_python_package
- run_tests
Expand All @@ -132,59 +132,31 @@ jobs:
needs.deployable_check.outputs.isDeployable == 'true' &&
needs.run_tests.result != 'failure' &&
needs.run_tests_extra.result != 'failure'
strategy:
matrix:
include:
- image: argilla/argilla-server
dockerfile: docker/Dockerfile
readme: README.md
platforms: linux/amd64
- image: argilla/argilla-quickstart
dockerfile: docker/quickstart.Dockerfile
readme: docker/quickstart.README.md
platforms: linux/amd64,linux/arm64
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout Code 🛎
uses: actions/checkout@v2
- name: Download python package
uses: actions/download-artifact@v3
with:
name: python-package
path: docker/dist
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Docker meta
id: meta
uses: crazy-max/ghaction-docker-meta@v2
with:
images: ${{ matrix.image }}
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.AR_DOCKER_USERNAME }}
password: ${{ secrets.AR_DOCKER_PASSWORD }}
- name: Build & push Docker image
uses: docker/build-push-action@v2
with:
context: docker
file: ${{ matrix.dockerfile }}
platforms: ${{ matrix.platforms }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
push: true
- name: Docker Hub Description
uses: peter-evans/dockerhub-description@v3
if: github.event_name == 'release'
with:
username: ${{ secrets.AR_DOCKER_USERNAME }}
password: ${{ secrets.AR_DOCKER_PASSWORD }}
repository: ${{ matrix.image }}
readme-filepath: ${{ matrix.readme }}
with:
download-python-package: true
image-name: argilla/argilla-server
dockerfile: docker/Dockerfile
readme: README.md
platforms: linux/amd64,linux/arm64
secrets: inherit

build_quickstart_docker_image:
name: Build Argilla quickstart docker image
uses: ./.github/workflows/build-push-docker.yml
needs: build_server_docker_image
if: |
always() &&
needs.build_server_docker_image.result == 'success'
with:
download-python-package: false
image-name: argilla/argilla-quickstart
dockerfile: docker/quickstart.Dockerfile
readme: docker/quickstart.README.md
platforms: linux/amd64,linux/arm64
build-args: |
ARGILLA_VERSION=${{ needs.build_server_docker_image.outputs.version }}
secrets: inherit

# This job will upload a Python Package using Twine when a release is created
# For more information see:
# https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
Expand All @@ -193,7 +165,7 @@ jobs:
runs-on: ubuntu-latest
if: ${{ github.event_name == 'release' }}
needs:
- deploy_docker
- build_quickstart_docker_image
defaults:
run:
shell: bash -l {0}
Expand Down
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ These are the section headers that we use:
- Added API and Python Client support for workspace deletion (Closes [#3260](https://github.com/argilla-io/argilla/issues/3260))
- Added `GET /api/v1/me/workspaces` endpoint to list the workspaces of the current active user ([#3390](https://github.com/argilla-io/argilla/pull/3390))


### Changed

- Updated output payload for `GET /api/v1/datasets/{dataset_id}/records`, `GET /api/v1/me/datasets/{dataset_id}/records`, `POST /api/v1/me/datasets/{dataset_id}/records/search` endpoints to include the suggestions of the records based on the value of the `include` query parameter ([#3304](https://github.com/argilla-io/argilla/pull/3304)).
Expand All @@ -43,12 +42,13 @@ These are the section headers that we use:
- `User.workspaces` is no longer an attribute but a property, and is calling `list_user_workspaces` to list all the workspace names for a given user ID ([#3334](https://github.com/argilla-io/argilla/pull/3334))
- Renamed `FeedbackDatasetConfig` to `DatasetConfig` and export/import from YAML as default instead of JSON (just used internally on `push_to_huggingface` and `from_huggingface` methods of `FeedbackDataset`) ([#3326](https://github.com/argilla-io/argilla/pull/3326)).
- The protected metadata fields support other than textual info - existing datasets must be reindex. See [docs](https://docs.argilla.io/en/latest/getting_started/installation/configurations/database_migrations.html#elasticsearch) for more detail (Closes [#3332](https://github.com/argilla-io/argilla/issues/3332)).
- Updated `Dockerfile` parent image from `python:3.9.16-slim` to `python:3.10.12-slim` ([#3425](https://github.com/argilla-io/argilla/pull/3425)).
- Updated `quickstart.Dockerfile` parent image from `elasticsearch:8.5.3` to `argilla/argilla-server:${ARGILLA_VERSION}` ([#3425](https://github.com/argilla-io/argilla/pull/3425)).

### Removed

- Removed support to non-prefixed environment variables. All valid env vars start with `ARGILLA_` (See [#3392](https://github.com/argilla-io/argilla/pull/3392)).


### Fixed

- Fixed `GET /api/v1/me/datasets/{dataset_id}/records` endpoint returning always the responses for the records even if `responses` was not provided via the `include` query parameter ([#3304](https://github.com/argilla-io/argilla/pull/3304)).
Expand Down
6 changes: 3 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9.16-slim
FROM python:3.10.12-slim

# Environment Variables
ENV ARGILLA_HOME_PATH=/var/lib/argilla
Expand All @@ -15,10 +15,10 @@ COPY scripts/start_argilla_server.sh /
COPY dist/*.whl /packages/

RUN apt-get update && \
apt-get install -y python-dev libpq-dev gcc && \
apt-get install -y libpq-dev gcc && \
chmod +x /start_argilla_server.sh && \
for wheel in /packages/*.whl; do pip install "$wheel"[server,postgresql]; done && \
apt-get remove -y python-dev libpq-dev gcc && \
apt-get remove -y libpq-dev gcc && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /packages
Expand Down
8 changes: 8 additions & 0 deletions docker/config/elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cluster.name: "docker-cluster"
network.host: 0.0.0.0
path.data: "/data/elasticsearch"
discovery.type: single-node
xpack.security.enabled: false
xpack.security.transport.ssl.enabled: false
xpack.security.http.ssl.enabled: false
cluster.routing.allocation.disk.threshold_enabled: false
68 changes: 42 additions & 26 deletions docker/quickstart.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,35 +1,54 @@
FROM docker.elastic.co/elasticsearch/elasticsearch:8.5.3

ENV DEBIAN_FRONTEND=noninteractive
# TODO(gabrielmbmb): update this `Dockerfile` to multi-staged build to reduce the image size
ARG ARGILLA_VERSION=latest
FROM argilla/argilla-server:${ARGILLA_VERSION}

USER root

# Create a directory where Elasticsearch and Argilla will store their data
# We will use this directory as a volume to persist data between container restarts (mainly in HF spaces)
RUN mkdir /data
RUN chown -R elasticsearch:elasticsearch /data
RUN apt-get update && apt-get install -y \
apt-transport-https \
gnupg \
wget

# Install Elasticsearch signing key
RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elasticsearch-keyring.gpg

# Add Elasticsearch repository
RUN echo "deb [signed-by=/usr/share/keyrings/elasticsearch-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee /etc/apt/sources.list.d/elastic-8.x.list

# Copy Argilla distribution files
COPY scripts/* /
COPY quickstart.requirements.txt /packages/requirements.txt
COPY dist/*.whl /packages/

RUN apt update && \
apt install -y curl git python3.9 python3.9-dev python3.9-distutils gcc gnupg apache2-utils sudo openssl systemctl && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python3.9 get-pip.py \
# Install Argilla
&& pip3 install -r /packages/requirements.txt && \

RUN \
# Indicate that this is a quickstart deployment
echo -e "{ \"deployment\": \"quickstart\" }" > /usr/local/lib/python3.10/site-packages/argilla/server/static/deployment.json && \
# Create an user to run the Argilla server and Elasticsearch
useradd -ms /bin/bash argilla && \
# Create a directory where Elasticsearch and Argilla will store their data
mkdir /data && \
# Install Elasticsearch and configure it
apt-get update && apt-get install -y elasticsearch=8.8.2 && \
chown -R argilla:argilla /usr/share/elasticsearch /etc/elasticsearch /var/lib/elasticsearch /var/log/elasticsearch && \
frascuchon marked this conversation as resolved.
Show resolved Hide resolved
chown argilla:argilla /etc/default/elasticsearch && \
# Install quickstart image dependencies
pip install -r /packages/requirements.txt && \
chmod +x /start_quickstart_argilla.sh && \
for wheel in /packages/*.whl; do pip install "$wheel"[server]; done && \
rm -rf /packages && \
rm -rf /var/lib/apt/lists/* \
# This line add context to this image. This solution should be improved
&& echo -e "{ \"deployment\": \"quickstart\" }" \
> /usr/local/lib/python3.9/dist-packages/argilla/server/static/deployment.json
# Give ownership of the data directory to the argilla user
chown -R argilla:argilla /data && \
# Clean up
apt-get remove -y wget gnupg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /packages

COPY config/elasticsearch.yml /etc/elasticsearch/elasticsearch.yml

# echo -e "{ \"deployment\": \"quickstart\" }" \
# > /usr/local/lmib/python/dist-packages/argilla/server/static/deployment.json

USER elasticsearch
USER argilla

RUN echo "path.data: /data/elasticsearch" >> /usr/share/elasticsearch/config/elasticsearch.yml
ENV ELASTIC_CONTAINER=true

ENV OWNER_USERNAME=owner
ENV OWNER_PASSWORD=12345678
Expand All @@ -47,9 +66,6 @@ ENV ARGILLA_WORKSPACE=$ADMIN_USERNAME
ENV LOAD_DATASETS=full
ENV UVICORN_PORT=6900

ENV xpack.security.enabled=false
ENV cluster.routing.allocation.disk.threshold_enabled=false
ENV discovery.type=single-node
ENV ES_JAVA_OPTS=-'Xms512m -Xmx512m'

CMD ["/start_quickstart_argilla.sh"]
12 changes: 6 additions & 6 deletions docker/scripts/start_quickstart_argilla.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
set -e

echo "Starting Elasticsearch"
elasticsearch 1>/dev/null 2>/dev/null &
/usr/share/elasticsearch/bin/elasticsearch 1>/dev/null 2>/dev/null &

echo "Waiting for elasticsearch to start"
sleep 30

echo "Running database migrations"
python3.9 -m argilla database migrate
python -m argilla database migrate

echo "Creating owner user"
python3.9 -m argilla users create \
python -m argilla users create \
--first-name "Owner" \
--username "$OWNER_USERNAME" \
--password "$OWNER_PASSWORD" \
Expand All @@ -21,7 +21,7 @@ python3.9 -m argilla users create \
--workspace "$ARGILLA_WORKSPACE"

echo "Creating admin user"
python3.9 -m argilla users create \
python -m argilla users create \
--first-name "Admin" \
--username "$ADMIN_USERNAME" \
--password "$ADMIN_PASSWORD" \
Expand All @@ -30,15 +30,15 @@ python3.9 -m argilla users create \
--workspace "$ARGILLA_WORKSPACE"

echo "Creating annotator user"
python3.9 -m argilla users create \
python -m argilla users create \
--first-name "Annotator" \
--username "$ANNOTATOR_USERNAME" \
--password "$ANNOTATOR_PASSWORD" \
--role annotator \
--workspace "$ARGILLA_WORKSPACE"

# Load data
python3.9 /load_data.py "$OWNER_API_KEY" "$LOAD_DATASETS" &
python /load_data.py "$OWNER_API_KEY" "$LOAD_DATASETS" &

# Start Argilla
echo "Starting Argilla"
Expand Down
Loading