feat(api): Add Table.sample
#15698
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Backends | |
on: | |
push: | |
# Skip the backend suite if all changes are docs | |
paths-ignore: | |
- "docs/**" | |
- "**/*.md" | |
- "**/*.qmd" | |
- "codecov.yml" | |
- ".envrc" | |
branches: | |
- master | |
- "*.x.x" | |
pull_request: | |
# Skip the backend suite if all changes are docs | |
paths-ignore: | |
- "docs/**" | |
- "**/*.md" | |
- "**/*.qmd" | |
- "codecov.yml" | |
- ".envrc" | |
branches: | |
- master | |
- "*.x.x" | |
merge_group: | |
permissions: | |
# this allows extractions/setup-just to list releases for `just` at a higher | |
# rate limit while restricting GITHUB_TOKEN permissions elsewhere | |
contents: read | |
concurrency: | |
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
cancel-in-progress: true | |
env: | |
FORCE_COLOR: "1" | |
jobs: | |
test_backends: | |
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} | |
runs-on: ${{ matrix.os }} | |
env: | |
SQLALCHEMY_WARN_20: "1" | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
- windows-latest | |
python-version: | |
- "3.9" | |
- "3.11" | |
backend: | |
- name: dask | |
title: Dask | |
extras: | |
- dask | |
- name: duckdb | |
title: DuckDB | |
extras: | |
- duckdb | |
- deltalake | |
additional_deps: | |
- torch | |
- name: pandas | |
title: Pandas | |
extras: | |
- pandas | |
- name: sqlite | |
title: SQLite | |
extras: | |
- sqlite | |
- name: datafusion | |
title: Datafusion | |
extras: | |
- datafusion | |
- name: polars | |
title: Polars | |
extras: | |
- polars | |
- deltalake | |
- name: mysql | |
title: MySQL | |
services: | |
- mysql | |
extras: | |
- mysql | |
- geospatial | |
sys-deps: | |
- libgeos-dev | |
- name: clickhouse | |
title: ClickHouse | |
services: | |
- clickhouse | |
extras: | |
- clickhouse | |
- name: postgres | |
title: PostgreSQL | |
extras: | |
- postgres | |
- geospatial | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- name: postgres | |
title: PostgreSQL + Torch | |
extras: | |
- postgres | |
- geospatial | |
additional_deps: | |
- torch | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- name: impala | |
title: Impala | |
serial: true | |
extras: | |
- impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
- name: mssql | |
title: MS SQL Server | |
serial: true | |
extras: | |
- mssql | |
services: | |
- mssql | |
sys-deps: | |
- libkrb5-dev | |
- krb5-config | |
- freetds-dev | |
- name: trino | |
title: Trino | |
extras: | |
- trino | |
- postgres | |
services: | |
- trino | |
- name: druid | |
title: Druid | |
extras: | |
- druid | |
services: | |
- druid | |
- name: oracle | |
title: Oracle | |
serial: true | |
extras: | |
- oracle | |
services: | |
- oracle | |
- name: flink | |
title: Flink | |
serial: true | |
extras: | |
- flink | |
additional_deps: | |
- apache-flink | |
even_more_deps: | |
- pandas~=1.5 | |
exclude: | |
- os: windows-latest | |
backend: | |
name: mysql | |
title: MySQL | |
extras: | |
- mysql | |
- geospatial | |
services: | |
- mysql | |
sys-deps: | |
- libgeos-dev | |
- os: windows-latest | |
backend: | |
name: clickhouse | |
title: ClickHouse | |
extras: | |
- clickhouse | |
services: | |
- clickhouse | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL | |
extras: | |
- postgres | |
- geospatial | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL + Torch | |
extras: | |
- postgres | |
- geospatial | |
additional_deps: | |
- torch | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- os: windows-latest | |
backend: | |
name: impala | |
title: Impala | |
serial: true | |
extras: | |
- impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
- os: windows-latest | |
backend: | |
name: mssql | |
title: MS SQL Server | |
serial: true | |
extras: | |
- mssql | |
services: | |
- mssql | |
sys-deps: | |
- libkrb5-dev | |
- krb5-config | |
- freetds-dev | |
- os: windows-latest | |
backend: | |
name: trino | |
title: Trino | |
services: | |
- trino | |
extras: | |
- trino | |
- postgres | |
- os: windows-latest | |
backend: | |
name: druid | |
title: Druid | |
extras: | |
- druid | |
services: | |
- druid | |
- os: windows-latest | |
backend: | |
name: oracle | |
title: Oracle | |
serial: true | |
extras: | |
- oracle | |
services: | |
- oracle | |
- python-version: "3.11" | |
backend: | |
name: flink | |
title: Flink | |
serial: true | |
extras: | |
- flink | |
additional_deps: | |
- apache-flink | |
even_more_deps: | |
- pandas~=1.5 | |
steps: | |
- name: update and install system dependencies | |
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null | |
run: | | |
set -euo pipefail | |
sudo apt-get update -qq -y | |
sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} | |
- name: install sqlite | |
if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite' | |
run: choco install sqlite | |
- name: checkout | |
uses: actions/checkout@v4 | |
- uses: extractions/setup-just@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: start services | |
if: matrix.backend.services != null | |
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} | |
- name: install python | |
uses: actions/setup-python@v4 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- uses: syphar/restore-pip-download-cache@v1 | |
with: | |
requirement_files: poetry.lock | |
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }} | |
- name: install poetry | |
run: python -m pip install --upgrade pip 'poetry==1.6.1' | |
- uses: syphar/restore-virtualenv@v1 | |
with: | |
requirement_files: poetry.lock | |
custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }} | |
- name: install ibis | |
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" | |
- name: install other deps | |
if: matrix.backend.additional_deps != null | |
run: poetry run pip install ${{ join(matrix.backend.additional_deps, ' ') }} | |
# FIXME(deepyaman) | |
- name: install even more deps | |
if: matrix.backend.even_more_deps != null | |
run: poetry run pip install ${{ join(matrix.backend.even_more_deps, ' ') }} | |
- name: show installed deps | |
run: poetry run pip list | |
- name: "run parallel tests: ${{ matrix.backend.name }}" | |
if: ${{ !matrix.backend.serial }} | |
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup | |
- name: "run serial tests: ${{ matrix.backend.name }}" | |
if: matrix.backend.serial && matrix.backend.name == 'impala' | |
run: just ci-check -m ${{ matrix.backend.name }} --randomly-dont-reorganize | |
env: | |
IBIS_TEST_NN_HOST: localhost | |
IBIS_TEST_IMPALA_HOST: localhost | |
IBIS_TEST_IMPALA_PORT: 21050 | |
IBIS_TEST_WEBHDFS_PORT: 50070 | |
IBIS_TEST_WEBHDFS_USER: hdfs | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: "run serial tests: ${{ matrix.backend.name }}" | |
if: matrix.backend.serial && matrix.backend.name == 'flink' | |
run: just ci-check -m ${{ matrix.backend.name }} ibis/backends/flink/tests | |
env: | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: "run serial tests: ${{ matrix.backend.name }}" | |
if: matrix.backend.serial && matrix.backend.name != 'impala' && matrix.backend.name != 'flink' | |
run: just ci-check -m ${{ matrix.backend.name }} | |
env: | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: check that no untracked files were produced | |
shell: bash | |
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . | |
- name: upload code coverage | |
if: success() | |
uses: codecov/codecov-action@v3 | |
with: | |
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
- name: Show docker compose logs on fail | |
if: matrix.backend.services != null && failure() | |
run: docker compose logs | |
test_backends_min_version: | |
name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} | |
runs-on: ${{ matrix.os }} | |
env: | |
SQLALCHEMY_WARN_20: "1" | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
- windows-latest | |
python-version: | |
- "3.9" | |
- "3.11" | |
backend: | |
- name: dask | |
title: Dask | |
deps: | |
- "dask[array,dataframe]@2022.9.1" | |
- "pandas@1.5.3" | |
extras: | |
- dask | |
- name: postgres | |
title: PostgreSQL | |
deps: | |
- "psycopg2@2.8.4" | |
- "GeoAlchemy2@0.6.3" | |
- "geopandas@0.6" | |
- "Shapely@2" | |
services: | |
- postgres | |
extras: | |
- postgres | |
- geospatial | |
exclude: | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL | |
deps: | |
- "psycopg2@2.8.4" | |
- "GeoAlchemy2@0.6.3" | |
- "geopandas@0.6" | |
- "Shapely@2" | |
services: | |
- postgres | |
extras: | |
- postgres | |
- geospatial | |
- python-version: "3.11" | |
backend: | |
name: postgres | |
title: PostgreSQL | |
deps: | |
- "psycopg2@2.8.4" | |
- "GeoAlchemy2@0.6.3" | |
- "geopandas@0.6" | |
- "Shapely@2" | |
services: | |
- postgres | |
extras: | |
- postgres | |
- geospatial | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: install libgeos for shapely | |
if: matrix.backend.name == 'postgres' | |
run: sudo apt-get install -qq -y build-essential libgeos-dev | |
- uses: extractions/setup-just@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: start services | |
if: matrix.backend.services != null | |
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} | |
- name: install python | |
uses: actions/setup-python@v4 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: install poetry | |
run: python -m pip install --upgrade pip 'poetry==1.6.1' | |
- name: install minimum versions | |
run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} | |
- name: checkout the lock file | |
run: git checkout poetry.lock | |
- name: lock with no updates | |
# poetry add is aggressive and will update other dependencies like | |
# numpy and pandas so we keep the pyproject.toml edits and then relock | |
# without updating anything except the requested versions | |
run: poetry lock --no-update | |
- name: install ibis | |
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" | |
- name: run tests | |
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup | |
- name: check that no untracked files were produced | |
shell: bash | |
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . | |
- name: upload code coverage | |
if: success() | |
uses: codecov/codecov-action@v3 | |
with: | |
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
- name: Show docker compose logs on fail | |
if: matrix.backend.services != null && failure() | |
run: docker compose logs | |
test_pyspark: | |
name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} pandas-${{ matrix.pandas.version }} | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
python-version: | |
- "3.10" | |
pandas: | |
- version: "1.5.*" | |
- version: "2.*.*" | |
conflicts: | |
- snowflake-sqlalchemy | |
- snowflake-connector-python | |
include: | |
- os: ubuntu-latest | |
python-version: "3.9" | |
pandas: | |
version: "1.5.*" | |
- os: ubuntu-latest | |
python-version: "3.11" | |
pandas: | |
version: "2.*.*" | |
conflicts: | |
- snowflake-sqlalchemy | |
- snowflake-connector-python | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- uses: actions/setup-java@v3 | |
with: | |
distribution: microsoft | |
java-version: 17 | |
- uses: extractions/setup-just@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: install python | |
uses: actions/setup-python@v4 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: install poetry | |
run: python -m pip install --upgrade pip 'poetry==1.6.1' | |
- name: remove conflicting deps | |
if: matrix.pandas.conflicts != null | |
run: poetry remove ${{ join(matrix.pandas.conflicts, ' ') }} | |
- name: install minimum versions | |
run: poetry add --lock 'pandas@${{ matrix.pandas.version }}' 'numpy@1.23.*' | |
- name: checkout the lock file | |
run: git checkout poetry.lock | |
- name: lock with no updates | |
# poetry add is aggressive and will update other dependencies like | |
# numpy and pandas so we keep the pyproject.toml edits and then relock | |
# without updating anything except the requested versions | |
run: poetry lock --no-update | |
- name: install ibis | |
run: poetry install --without dev --without docs --extras pyspark | |
- name: run tests | |
run: just ci-check -m pyspark | |
- name: check that no untracked files were produced | |
shell: bash | |
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . | |
- name: upload code coverage | |
if: success() | |
uses: codecov/codecov-action@v3 | |
with: | |
flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }},pandas-${{ matrix.pandas.version }} | |
gen_lockfile_sqlalchemy2: | |
name: Generate Poetry Lockfile for SQLAlchemy 2 | |
runs-on: ubuntu-latest | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: install python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.11" | |
- run: python -m pip install --upgrade pip 'poetry==1.6.1' | |
- name: remove deps that are not compatible with sqlalchemy 2 | |
run: poetry remove snowflake-sqlalchemy | |
- name: add sqlalchemy 2 | |
run: poetry add --lock --optional 'sqlalchemy>=2,<3' | |
- name: checkout the lock file | |
run: git checkout poetry.lock | |
- name: lock with no updates | |
# poetry add is aggressive and will update other dependencies like | |
# numpy and pandas so we keep the pyproject.toml edits and then relock | |
# without updating anything except the requested versions | |
run: poetry lock --no-update | |
- name: check the sqlalchemy version | |
run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.' | |
- name: upload deps file | |
uses: actions/upload-artifact@v3 | |
with: | |
name: deps | |
path: | | |
pyproject.toml | |
poetry.lock | |
test_backends_sqlalchemy2: | |
name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} | |
runs-on: ${{ matrix.os }} | |
needs: | |
- gen_lockfile_sqlalchemy2 | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
python-version: | |
- "3.11" | |
backend: | |
- name: mssql | |
title: MS SQL Server | |
services: | |
- mssql | |
extras: | |
- mssql | |
- name: mysql | |
title: MySQL | |
services: | |
- mysql | |
extras: | |
- geospatial | |
- mysql | |
- name: postgres | |
title: PostgreSQL | |
services: | |
- postgres | |
extras: | |
- geospatial | |
- postgres | |
- name: sqlite | |
title: SQLite | |
extras: | |
- sqlite | |
- name: trino | |
title: Trino | |
services: | |
- trino | |
extras: | |
- trino | |
- postgres | |
- name: duckdb | |
title: DuckDB | |
extras: | |
- duckdb | |
- name: oracle | |
title: Oracle | |
serial: true | |
extras: | |
- oracle | |
services: | |
- oracle | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: install libgeos for shapely | |
if: ${{ matrix.backend.name == 'postgres' }} | |
run: sudo apt-get install -qq -y build-essential libgeos-dev | |
- name: install freetds-dev for mssql | |
if: ${{ matrix.backend.name == 'mssql' }} | |
run: sudo apt-get install -qq -y build-essential libkrb5-dev krb5-config freetds-dev | |
- uses: extractions/setup-just@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: start services | |
if: matrix.backend.services != null | |
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} | |
- name: install python | |
uses: actions/setup-python@v4 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: download poetry lockfile | |
uses: actions/download-artifact@v3 | |
with: | |
name: deps | |
path: deps | |
- name: pull out lockfile | |
run: | | |
set -euo pipefail | |
mv -f deps/* . | |
rm -r deps | |
- uses: syphar/restore-virtualenv@v1 | |
with: | |
requirement_files: poetry.lock | |
custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }} | |
- uses: syphar/restore-pip-download-cache@v1 | |
with: | |
requirement_files: poetry.lock | |
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }} | |
- name: install poetry | |
run: python -m pip install --upgrade pip 'poetry==1.6.1' | |
- name: install ibis | |
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" | |
- name: run tests | |
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup | |
- name: check that no untracked files were produced | |
shell: bash | |
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . | |
- name: upload code coverage | |
if: success() | |
uses: codecov/codecov-action@v3 | |
with: | |
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
backends: | |
# this job exists so that we can use a single job from this workflow to gate merging | |
runs-on: ubuntu-latest | |
needs: | |
- test_backends_min_version | |
- test_backends | |
- test_backends_sqlalchemy2 | |
- test_pyspark | |
steps: | |
- run: exit 0 |