Skip to content

Commit

Permalink
drop python 3.8, enable python 3.13, and enable full linting for 3.12 (
Browse files Browse the repository at this point in the history
…#2194)

* add python 3.12 linting

* update locked versions to make project installable on py 3.12

* update flake8

* downgrade poetry for all tests relying on python3.8

* drop python 3.8

* enable python3.13

* copy test updates from python3.13 branch

* update locked sentry version

* pin poetry to 1.8.5

* install ibis outside of poetry

* rename to workflows for consistency

* switch to published alpha version of dlt-pendulum for python 3.13

* fix images

* add note to readme
  • Loading branch information
sh-rp authored Jan 12, 2025
1 parent c7c3370 commit cbcff92
Show file tree
Hide file tree
Showing 47 changed files with 1,601 additions and 1,287 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
matrix:
os:
- ubuntu-latest
python-version: ["3.9.x", "3.10.x", "3.11.x"]
python-version: ["3.9.x", "3.10.x", "3.11.x", "3.12.x"]

defaults:
run:
Expand All @@ -42,9 +42,9 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
uses: snok/install-poetry@v1
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
Expand Down
22 changes: 6 additions & 16 deletions .github/workflows/test_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ jobs:
os: "ubuntu-latest"
- python-version: "3.12.x"
os: "ubuntu-latest"
- python-version: "3.13.x"
os: "ubuntu-latest"
- python-version: "3.13.x"
os: "windows-latest"

defaults:
run:
Expand Down Expand Up @@ -115,7 +119,7 @@ jobs:
shell: cmd
- name: Install pyarrow
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk && poetry run pip install pyarrow==15.0.2
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk

- run: |
poetry run pytest tests/pipeline/test_pipeline_extra.py -k arrow
Expand All @@ -128,7 +132,7 @@ jobs:
shell: cmd
- name: Install pipeline and sources dependencies
run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources && poetry run pip install pyarrow==15.0.2
run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources

- run: |
poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations tests/sources
Expand All @@ -154,20 +158,6 @@ jobs:
name: Run extract tests Windows
shell: cmd
# here we upgrade pyarrow to 17 and run the libs tests again
- name: Install pyarrow 17
run: poetry run pip install pyarrow==17.0.0

- run: |
poetry run pytest tests/libs
if: runner.os != 'Windows'
name: Run libs tests Linux/MAC
- run: |
poetry run pytest tests/libs
if: runner.os == 'Windows'
name: Run libs tests Windows
shell: cmd
# - name: Install Pydantic 1.0
# run: pip install "pydantic<2"

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena_iceberg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_bigquery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_clickhouse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_dremio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- run: |
poetry run pytest tests/load --ignore tests/load/sources
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_motherduck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-motherduck

- name: Install dependencies
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_mssql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations

- name: Install dependencies
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline,ibis && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse] && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_synapse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline,ibis
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
# key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-redshift

- name: Install dependencies
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline -E deltalake -E pyiceberg && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: Upgrade sqlalchemy
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations

- name: Install dependencies
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline -E deltalake -E pyiceberg && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]

- name: Upgrade sqlalchemy
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_doc_snippets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
node-version: 20

- name: Install Poetry
uses: snok/install-poetry@v1
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
Expand Down
File renamed without changes.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,5 @@ tmp
local_cache/

# test file for examples are generated and should not be committed
docs/examples/**/test*.py
docs/examples/**/test*.py
compiled_requirements.txt
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,14 @@ publish-library: build-library
poetry publish

test-build-images: build-library
# TODO: enable when we can remove special duckdb setting for python 3.12
# NOTE: poetry export does not work with our many different deps, we install a subset and freeze
# poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift
# grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt
poetry install --no-interaction -E gcp -E redshift -E duckdb
poetry run pip freeze > _gen_requirements.txt
# filter out libs that need native compilation
grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt
docker build -f deploy/dlt/Dockerfile.airflow --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
# docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .

preprocess-docs:
# run docs preprocessing to run a few checks and ensure examples can be parsed
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Be it a Google Colab notebook, AWS Lambda function, an Airflow DAG, your local l

## Installation

dlt supports Python 3.8+.
dlt supports Python 3.9+. Python 3.13 is supported but considered experimental at this time as not all of dlts extras have python 3.13. support. We additionally maintain a [forked version of pendulum](https://github.com/dlt-hub/pendulum) for 3.13 until there is a release for 3.13.

```sh
pip install dlt
Expand Down
2 changes: 1 addition & 1 deletion compiled_packages.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
cffi\|idna\|simplejson\|pendulum\|grpcio\|google-crc32c
pyarrow\|cffi\|idna\|simplejson\|pendulum\|grpcio\|google-crc32c
30 changes: 21 additions & 9 deletions deploy/dlt/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM alpine:3.15
FROM python:3.11.11-alpine

# Metadata
LABEL org.label-schema.vendor="dltHub" \
Expand All @@ -17,16 +17,28 @@ ADD compiled_requirements.txt .

# install alpine deps
RUN apk update &&\
apk add --no-cache python3 ca-certificates curl postgresql &&\
apk add --no-cache --virtual build-deps build-base automake autoconf libtool python3-dev postgresql-dev libffi-dev linux-headers gcc musl-dev &&\
ln -s /usr/bin/python3 /usr/bin/python &&\
apk add --no-cache ca-certificates curl postgresql git &&\
apk add --no-cache --virtual build-deps build-base automake autoconf libtool python3-dev postgresql-dev libffi-dev linux-headers gcc musl-dev cmake &&\
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py &&\
rm get-pip.py &&\
pip3 install --upgrade setuptools wheel &&\
rm -r /usr/lib/python*/ensurepip &&\
pip3 install -r compiled_requirements.txt &&\
pip install --upgrade setuptools wheel pip &&\
rm -r /usr/lib/python*/ensurepip

# install arrow 17.0.0, usually we would need apache-arrow-dev=17.0.0 but it is not available in alpine 3.20
# adapt this version to the arrow version you need
RUN git clone --no-checkout https://github.com/apache/arrow.git /arrow \
&& cd /arrow \
&& git checkout tags/apache-arrow-17.0.0 \
&& cd cpp \
&& mkdir build \
&& cd build \
&& cmake -DARROW_CSV=ON -DARROW_JSON=ON -DARROW_FILESYSTEM=ON .. \
&& make -j$(nproc) \
&& make install

RUN pip install -r compiled_requirements.txt &&\
apk del --purge build-deps
#rm -r /root/.cache


# add build labels and envs
ARG COMMIT_SHA=""
Expand All @@ -38,7 +50,7 @@ ENV IMAGE_VERSION=${IMAGE_VERSION}

# install exactly the same version of the library we used to build
COPY dist/dlt-${IMAGE_VERSION}.tar.gz .
RUN pip3 install /tmp/pydlt/dlt-${IMAGE_VERSION}.tar.gz[gcp,redshift,duckdb]
RUN pip install /tmp/pydlt/dlt-${IMAGE_VERSION}.tar.gz[gcp,redshift,duckdb]

WORKDIR /
RUN rm -r /tmp/pydlt
2 changes: 1 addition & 1 deletion deploy/dlt/Dockerfile.airflow
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM apache/airflow:2.5.2-python3.8
FROM apache/airflow:2.5.2-python3.9

# Metadata
LABEL org.label-schema.vendor="dltHub" \
Expand Down
10 changes: 6 additions & 4 deletions dlt/cli/deploy_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import abc
import os
import yaml
import ast

from yaml import Dumper
from itertools import chain
from typing import List, Optional, Sequence, Tuple, Any, Dict
Expand All @@ -22,7 +24,7 @@
from dlt.common.git import get_origin, get_repo, Repo
from dlt.common.configuration.specs.runtime_configuration import get_default_pipeline_name
from dlt.common.typing import StrAny
from dlt.common.reflection.utils import evaluate_node_literal, ast_unparse
from dlt.common.reflection.utils import evaluate_node_literal
from dlt.common.pipeline import LoadInfo, TPipelineState, get_dlt_repos_dir
from dlt.common.storages import FileStorage
from dlt.common.utils import set_working_dir
Expand Down Expand Up @@ -312,7 +314,7 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
if f_r_value is None:
fmt.warning(
"The value of `dev_mode` in call to `dlt.pipeline` cannot be"
f" determined from {ast_unparse(f_r_node).strip()}. We assume that you know"
f" determined from {ast.unparse(f_r_node).strip()}. We assume that you know"
" what you are doing :)"
)
if f_r_value is True:
Expand All @@ -330,7 +332,7 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
raise CliCommandInnerException(
"deploy",
"The value of 'pipelines_dir' argument in call to `dlt_pipeline` cannot be"
f" determined from {ast_unparse(p_d_node).strip()}. Pipeline working dir"
f" determined from {ast.unparse(p_d_node).strip()}. Pipeline working dir"
" will be found. Pass it directly with --pipelines-dir option.",
)

Expand All @@ -341,7 +343,7 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
raise CliCommandInnerException(
"deploy",
"The value of 'pipeline_name' argument in call to `dlt_pipeline` cannot be"
f" determined from {ast_unparse(p_d_node).strip()}. Pipeline working dir"
f" determined from {ast.unparse(p_d_node).strip()}. Pipeline working dir"
" will be found. Pass it directly with --pipeline-name option.",
)
pipelines.append((pipeline_name, pipelines_dir))
Expand Down
4 changes: 2 additions & 2 deletions dlt/cli/source_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dlt.common.configuration import is_secret_hint
from dlt.common.configuration.specs import BaseConfiguration
from dlt.common.reflection.utils import creates_func_def_name_node, ast_unparse
from dlt.common.reflection.utils import creates_func_def_name_node
from dlt.common.typing import is_optional_type

from dlt.sources import SourceReference
Expand Down Expand Up @@ -63,7 +63,7 @@ def find_source_calls_to_replace(
for calls in visitor.known_sources_resources_calls.values():
for call in calls:
transformed_nodes.append(
(call.func, ast.Name(id=pipeline_name + "_" + ast_unparse(call.func)))
(call.func, ast.Name(id=pipeline_name + "_" + ast.unparse(call.func)))
)

return transformed_nodes
Expand Down
1 change: 0 additions & 1 deletion dlt/common/jsonpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def resolve_paths(paths: TAnyJsonPath, data: DictStrAny) -> List[str]:
>>> # ['a.items.[0].b', 'a.items.[1].b']
"""
paths = compile_paths(paths)
p: JSONPath
return list(chain.from_iterable((str(r.full_path) for r in p.find(data)) for p in paths))


Expand Down
Loading

0 comments on commit cbcff92

Please sign in to comment.