Skip to content

feat(polars): add Intersection and Difference ops #25952

feat(polars): add Intersection and Difference ops

feat(polars): add Intersection and Difference ops #25952

Workflow file for this run

name: Backends
on:
push:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "**/*.md"
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
- ".codespellrc"
branches:
- main
- "*.x.x"
pull_request:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "**/*.md"
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
- ".codespellrc"
branches:
- main
- "*.x.x"
merge_group:
permissions:
# this allows extractions/setup-just to list releases for `just` at a higher
# rate limit while restricting GITHUB_TOKEN permissions elsewhere
contents: read
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
env:
FORCE_COLOR: "1"
ODBCSYSINI: "${{ github.workspace }}/ci/odbc"
HYPOTHESIS_PROFILE: "ci"
jobs:
test_bigquery_lite:
name: BigQuery ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
python-version:
- "3.10"
- "3.13"
steps:
- name: checkout
uses: actions/checkout@v4
- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: install uv
uses: astral-sh/[email protected]
- uses: extractions/setup-just@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: run simple bigquery unit tests
run: just ci-check "--extra bigquery" ibis/backends/bigquery/tests/unit
- name: upload code coverage
if: success()
continue-on-error: true
uses: codecov/codecov-action@v5
with:
flags: backend,bigquery,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
token: ${{ secrets.CODECOV_TOKEN }}
test_backends:
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
python-version:
- "3.10"
- "3.13"
backend:
- name: duckdb
title: DuckDB
serial: true
extras:
- --extra duckdb
- --extra deltalake
- --extra geospatial
- --extra examples
- --extra decompiler
- --extra polars
additional_deps:
- torch
- name: clickhouse
title: ClickHouse
services:
- clickhouse
extras:
- --extra clickhouse
- --extra examples
- name: sqlite
title: SQLite
extras:
- --extra sqlite
- name: datafusion
title: DataFusion
serial: true
extras:
- --extra datafusion
- name: polars
title: Polars
extras:
- --extra polars
- --extra deltalake
- name: mysql
title: MySQL
services:
- mysql
extras:
- --extra mysql
- --extra geospatial
- --extra polars
sys-deps:
- libgeos-dev
- default-libmysqlclient-dev
- name: postgres
title: PostgreSQL
extras:
- --extra postgres
- --extra geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- name: postgres
title: PostgreSQL + Torch
extras:
- --extra postgres
- --extra geospatial
- --extra polars
additional_deps:
- torch
services:
- postgres
sys-deps:
- libgeos-dev
- name: risingwave
title: RisingWave
serial: true
services:
- risingwave
extras:
- --extra risingwave
- name: impala
title: Impala
serial: true
extras:
- --extra impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- name: mssql
title: MS SQL Server
extras:
- --extra mssql
- --extra polars
services:
- mssql
sys-deps:
- freetds-dev
- unixodbc-dev
- tdsodbc
- name: trino
title: Trino
extras:
- --extra trino
services:
- trino
- name: druid
title: Druid
extras:
- --extra druid
services:
- druid
- name: exasol
title: Exasol
serial: true
extras:
- --extra exasol
services:
- exasol
- name: oracle
title: Oracle
serial: true
extras:
- --extra oracle
- --extra polars
services:
- oracle
- name: flink
title: Flink
serial: true
extras:
- --extra flink
additional_deps:
- "'apache-flink==1.20.0'"
- "'pandas<2.2'"
- setuptools
services:
- flink
include:
- os: ubuntu-latest
python-version: "3.11"
backend:
name: flink
title: Flink
serial: true
extras:
- --extra flink
additional_deps:
- "'apache-flink==1.20.0'"
- "'pandas<2.2'"
- setuptools
services:
- flink
- os: ubuntu-latest
python-version: "3.11"
backend:
name: impala
title: Impala
serial: true
extras:
- --extra impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
# pytorch wheel doesn't exist for windows + python 3.13, so test
# against 3.12 until that's shipped
- os: windows-latest
python-version: "3.12"
backend:
name: duckdb
title: DuckDB
serial: true
extras:
- --extra duckdb
- --extra deltalake
- --extra geospatial
- --extra examples
- --extra decompiler
- --extra polars
additional_deps:
- torch
# also test duckdb with python 3.13 on windows, *without* pytorch
- os: windows-latest
python-version: "3.13"
backend:
name: duckdb
title: DuckDB
serial: true
extras:
- --extra duckdb
- --extra deltalake
- --extra geospatial
- --extra examples
- --extra decompiler
- --extra polars
exclude:
- os: windows-latest
python-version: "3.13"
backend:
name: duckdb
- os: windows-latest
backend:
name: mysql
title: MySQL
extras:
- --extra mysql
- --extra geospatial
- --extra polars
services:
- mysql
sys-deps:
- libgeos-dev
- default-libmysqlclient-dev
- os: windows-latest
backend:
name: clickhouse
title: ClickHouse
extras:
- --extra clickhouse
- --extra examples
services:
- clickhouse
- os: windows-latest
backend:
name: postgres
title: PostgreSQL
extras:
- --extra postgres
- --extra geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: risingwave
title: RisingWave
serial: true
services:
- risingwave
extras:
- --extra risingwave
- os: windows-latest
backend:
name: postgres
title: PostgreSQL + Torch
extras:
- --extra postgres
- --extra geospatial
- --extra polars
additional_deps:
- torch
services:
- postgres
sys-deps:
- libgeos-dev
# TODO(deepyaman): Test whether this works upon releasing https://github.com/cloudera/impyla/commit/bf1f94c3c4106ded6267d2485c1e939775a6a87f
- os: ubuntu-latest
python-version: "3.13"
backend:
name: impala
title: Impala
serial: true
extras:
- --extra impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- os: windows-latest
backend:
name: impala
title: Impala
serial: true
extras:
- --extra impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- os: windows-latest
backend:
name: mssql
title: MS SQL Server
extras:
- --extra mssql
- --extra polars
services:
- mssql
sys-deps:
- freetds-dev
- unixodbc-dev
- tdsodbc
- os: windows-latest
backend:
name: trino
title: Trino
services:
- trino
extras:
- --extra trino
- os: windows-latest
backend:
name: druid
title: Druid
extras:
- --extra druid
services:
- druid
- os: windows-latest
backend:
name: oracle
title: Oracle
serial: true
extras:
- --extra oracle
- --extra polars
services:
- oracle
- os: ubuntu-latest
python-version: "3.13"
backend:
name: flink
title: Flink
serial: true
extras:
- --extra flink
additional_deps:
- "'apache-flink==1.20.0'"
- "'pandas<2.2'"
- setuptools
services:
- flink
- os: windows-latest
backend:
name: flink
title: Flink
serial: true
extras:
- --extra flink
additional_deps:
- "'apache-flink==1.20.0'"
- "'pandas<2.2'"
- setuptools
services:
- flink
- os: windows-latest
backend:
name: exasol
title: Exasol
serial: true
extras:
- --extra exasol
services:
- exasol
steps:
- name: update and install system dependencies
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null
run: |
set -euo pipefail
sudo apt-get update -qq -y
sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }}
- name: install sqlite
if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite'
run: choco install sqlite
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: start services
if: matrix.backend.services != null
run: just up ${{ join(matrix.backend.services, ' ') }}
- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: install uv
uses: astral-sh/[email protected]
- name: install other deps
if: matrix.backend.additional_deps != null
run: uv add --no-sync --optional ${{ matrix.backend.name }} ${{ join(matrix.backend.additional_deps, ' ') }}
- name: show installed deps
run: uv tree
- name: "run parallel tests: ${{ matrix.backend.name }}"
if: ${{ !matrix.backend.serial }}
run: just ci-check "${{ join(matrix.backend.extras, ' ') }} --extra examples" -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup
env:
IBIS_TEST_IMPALA_HOST: localhost
IBIS_TEST_IMPALA_PORT: 21050
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
- name: "run serial tests: ${{ matrix.backend.name }}"
if: matrix.backend.serial
run: just ci-check "${{ join(matrix.backend.extras, ' ') }} --extra examples" -m ${{ matrix.backend.name }}
env:
FLINK_REMOTE_CLUSTER_ADDR: localhost
FLINK_REMOTE_CLUSTER_PORT: "8081"
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
- name: "run backend doctests: ${{ matrix.backend.name }}"
if: matrix.os == 'ubuntu-latest'
run: just backend-doctests ${{ matrix.backend.name }}
env:
FLINK_REMOTE_CLUSTER_ADDR: localhost
FLINK_REMOTE_CLUSTER_PORT: "8081"
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
- name: checkout uv.lock and pyproject.toml
run: git checkout uv.lock pyproject.toml
- name: check that no untracked files were produced
shell: bash
run: |
! git status --porcelain | grep -F .
- name: upload code coverage
if: success()
continue-on-error: true
uses: codecov/codecov-action@v5
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
token: ${{ secrets.CODECOV_TOKEN }}
- name: Show docker compose logs on fail
if: matrix.backend.services != null && failure()
run: docker compose logs
test_pyspark:
name: PySpark ${{ matrix.tag }} ${{ matrix.pyspark-minor-version }} ubuntu-latest python-${{ matrix.python-version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- python-version: "3.10"
pyspark-minor-version: "3.3"
tag: local
deps:
- pyspark==3.3.4
- pandas==1.5.3
- numpy==1.23.5
- python-version: "3.11"
pyspark-minor-version: "3.5"
tag: local
deps:
- delta-spark==3.2.1
- python-version: "3.13"
pyspark-minor-version: "3.5"
tag: local
deps:
- setuptools==75.1.0
- delta-spark==3.2.1
- python-version: "3.12"
pyspark-minor-version: "3.5"
SPARK_REMOTE: "sc://localhost:15002"
tag: remote
deps:
- setuptools==75.1.0
- delta-spark==3.2.1
- googleapis-common-protos
- grpcio
- grpcio-status
steps:
- name: checkout
uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
distribution: microsoft
java-version: 17
- uses: extractions/setup-just@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: start services
if: matrix.tag == 'remote'
run: just up spark-connect
- name: download backend data
run: just download-data
- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: install uv
uses: astral-sh/[email protected]
# it requires a version of pandas that pyspark is not compatible with
- name: remove lonboard
if: matrix.pyspark-minor-version == '3.3'
run: uv remove --group docs --no-sync lonboard
- name: install pyspark-specific dependencies
run: uv add --no-sync ${{ join(matrix.deps, ' ') }}
- name: install iceberg
shell: bash
run: just download-iceberg-jar ${{ matrix.pyspark-minor-version }}
- name: run spark connect tests
if: matrix.tag == 'remote'
run: just ci-check "--extra pyspark --extra examples" -m pyspark
env:
SPARK_REMOTE: ${{ matrix.SPARK_REMOTE }}
- name: run spark tests
if: matrix.tag == 'local'
run: just ci-check "--extra pyspark --extra examples" -m pyspark
- name: check that no untracked files were produced
shell: bash
run: git checkout uv.lock pyproject.toml && ! git status --porcelain | grep -F .
- name: upload code coverage
# only upload coverage for jobs that aren't mostly xfails
if: success()
continue-on-error: true
uses: codecov/codecov-action@v5
with:
flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
token: ${{ secrets.CODECOV_TOKEN }}
backends:
# this job exists so that we can use a single job from this workflow to gate merging
runs-on: ubuntu-latest
needs:
- test_bigquery_lite
- test_backends
- test_pyspark
steps:
- run: exit 0