Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Make it work #1

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: 2

updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"

- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
81 changes: 81 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
---
name: "Tests: Common"

on:
pull_request: ~
push:
branches: [ main ]

# Allow job to be triggered manually.
workflow_dispatch:

# Run job each night after CrateDB nightly has been published.
schedule:
- cron: '0 3 * * *'

# Cancel in-progress jobs when pushing to the same branch.
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}

jobs:

tests:

runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python-version: ["3.8", "3.11"]

env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python-version }}
# Do not tear down Testcontainers
TC_KEEPALIVE: true

# https://docs.github.com/en/actions/using-containerized-services/about-service-containers
services:
cratedb:
image: crate/crate:nightly
ports:
- 4200:4200
- 5432:5432

name: Python ${{ matrix.python-version }} on OS ${{ matrix.os }}
steps:

- name: Acquire sources
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64
cache: 'pip'
cache-dependency-path: 'pyproject.toml'

- name: Set up project
run: |
# `setuptools 0.64.0` adds support for editable install hooks (PEP 660).
# https://github.com/pypa/setuptools/blob/main/CHANGES.rst#v6400
pip install "setuptools>=64" --upgrade
# Install package in editable mode.
pip install --use-pep517 --prefer-binary --editable=.[test,develop]
- name: Run linter and software tests
run: |
poe check
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
files: ./coverage.xml
flags: main
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ dynamic = [
"version",
]
dependencies = [
"crate[sqlalchemy]",
"cratedb-toolkit",
"crate[sqlalchemy]>=0.34",
"cratedb-toolkit @ git+https://github.com/crate-workbench/cratedb-toolkit@sa-no-pinning",
'importlib-resources; python_version < "3.9"',
"meltanolabs-tap-postgres==0.0.6",
]
Expand All @@ -103,9 +103,12 @@ release = [
"twine<5",
]
test = [
"faker>=18.5.1,<21.0.0",
"pendulum~=2.1",
"pytest<8",
"pytest-cov<5",
"pytest-mock<4",
"singer-sdk[testing]",
]
[project.urls]
changelog = "https://github.com/crate-workbench/meltano-tap-cratedb/blob/main/CHANGES.md"
Expand Down Expand Up @@ -222,7 +225,7 @@ namespaces = false
[tool.poe.tasks]

check = [
"lint",
# "lint",
"test",
]

Expand Down
5 changes: 4 additions & 1 deletion tap_cratedb/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
"""A Singer tap for CrateDB, built with the Meltano SDK."""
"""A Singer tap for CrateDB, built with the Meltano SDK, based on the PostgreSQL tap."""
from tap_cratedb.patch import patch_sqlalchemy_dialect

patch_sqlalchemy_dialect()
65 changes: 65 additions & 0 deletions tap_cratedb/patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import datetime as dt


def patch_sqlalchemy_dialect():
patch_types()
patch_datetime()
patch_get_pk_constraint()


def patch_datetime():
"""
The test suite will supply `dt.date` objects, which will
otherwise fail on this routine.
"""

from crate.client.sqlalchemy.dialect import DateTime

def bind_processor(self, dialect):
def process(value):
if isinstance(value, (dt.datetime, dt.date)):
return value.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
else:
return value
return process

DateTime.bind_processor = bind_processor


def patch_get_pk_constraint():
"""
Convert from `set` to `list`, to work around weirdness of the Python dialect.

tap = TapCrateDB(config=SAMPLE_CONFIG)
tap_catalog = json.loads(tap.catalog_json_text)

TypeError: Object of type set is not JSON serializable
"""
from sqlalchemy.engine import reflection
from crate.client.sqlalchemy import CrateDialect

get_pk_constraint_dist = CrateDialect.get_pk_constraint

@reflection.cache
def get_pk_constraint(self, engine, table_name, schema=None, **kw):
outcome = get_pk_constraint_dist(self, engine, table_name, schema=schema, **kw)
outcome["constrained_columns"] = list(outcome["constrained_columns"])
return outcome

CrateDialect.get_pk_constraint = get_pk_constraint


def patch_types():
"""
Emulate PostgreSQL's `JSON` and `JSONB` types using CrateDB's `OBJECT` type.
"""
from crate.client.sqlalchemy.compiler import CrateTypeCompiler

def visit_JSON(self, type_, **kw):
return "OBJECT"

def visit_JSONB(self, type_, **kw):
return "OBJECT"

CrateTypeCompiler.visit_JSON = visit_JSON
CrateTypeCompiler.visit_JSONB = visit_JSONB
Empty file added tests/__init__.py
Empty file.
11 changes: 11 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
"""Test Configuration."""
import logging

pytest_plugins = ("singer_sdk.testing.pytest_plugin",)

# Increase loggin for components we are working on.
logging.getLogger("sqlconnector").setLevel(logging.DEBUG)
logging.getLogger("tap-cratedb").setLevel(logging.DEBUG)
logging.getLogger("tap-postgres").setLevel(logging.DEBUG)

# Decrease logging for components not of immediate interest.
logging.getLogger("faker").setLevel(logging.INFO)
logging.getLogger("crate.client.http").setLevel(logging.INFO)
logging.getLogger("urllib3.connectionpool").setLevel(logging.INFO)
6 changes: 3 additions & 3 deletions tests/resources/data.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"streams": [
{
"tap_stream_id": "public-test_replication_key",
"tap_stream_id": "doc-test_replication_key",
"table_name": "test_replication_key",
"replication_method": "",
"key_properties": [
Expand Down Expand Up @@ -34,7 +34,7 @@
]
},
"is_view": false,
"stream": "public-test_replication_key",
"stream": "doc-test_replication_key",
"metadata": [
{
"breadcrumb": [
Expand Down Expand Up @@ -74,7 +74,7 @@
"id"
],
"forced-replication-method": "",
"schema-name": "public",
"schema-name": "doc",
"selected": true,
"replication-method": "INCREMENTAL",
"replication-key": "updated_at"
Expand Down
6 changes: 3 additions & 3 deletions tests/resources/data_selected_columns_only.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"streams": [
{
"tap_stream_id": "public-test_selected_columns_only",
"tap_stream_id": "doc-test_selected_columns_only",
"table_name": "test_selected_columns_only",
"replication_method": "",
"key_properties": [
Expand Down Expand Up @@ -34,7 +34,7 @@
]
},
"is_view": false,
"stream": "public-test_selected_columns_only",
"stream": "doc-test_selected_columns_only",
"metadata": [
{
"breadcrumb": [
Expand Down Expand Up @@ -74,7 +74,7 @@
"id"
],
"forced-replication-method": "",
"schema-name": "public",
"schema-name": "doc",
"selected": true,
"replication-method": "INCREMENTAL",
"replication-key": "updated_at"
Expand Down
7 changes: 7 additions & 0 deletions tests/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# The database schema name.

# PostgreSQL default.
# DB_SCHEMA_NAME = "public"

# CrateDB default.
DB_SCHEMA_NAME = "doc"
Loading