Skip to content

Commit

Permalink
refactor: filesystem sftp tests
Browse files Browse the repository at this point in the history
  • Loading branch information
donotpush committed Sep 12, 2024
1 parent 08f07d3 commit 87ab87a
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 103 deletions.
88 changes: 0 additions & 88 deletions .github/workflows/test_destination_sftp.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,6 @@ jobs:
if: ${{ ! (contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule')}}
- run: |
poetry run pytest tests/load --ignore tests/load/sources -m "not sftp"
poetry run pytest tests/load --ignore tests/load/sources
name: Run all tests Linux
if: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule'}}
27 changes: 23 additions & 4 deletions .github/workflows/test_local_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ env:
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
ACTIVE_DESTINATIONS: "[\"duckdb\", \"postgres\", \"filesystem\", \"weaviate\", \"qdrant\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\", \"sftp\"]"

DESTINATION__WEAVIATE__VECTORIZER: text2vec-contextionary
DESTINATION__WEAVIATE__MODULE_CONFIG: "{\"text2vec-contextionary\": {\"vectorizeClassName\": false, \"vectorizePropertyName\": true}}"
Expand Down Expand Up @@ -96,15 +96,34 @@ jobs:

- name: Install dependencies
run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant --with sentry-sdk --with pipeline -E deltalake


- name: Start SFTP server
run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d

- name: Configure SSH Agent for sftp tests
run: |
mkdir -p /home/runner/.ssh
cp tests/load/filesystem_sftp/bootstrap/bobby_rsa /home/runner/.ssh/id_rsa
cp tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub /home/runner/.ssh/id_rsa.pub
# always run full suite, also on branches
- run: poetry run pytest tests/load --ignore tests/load/sources -m "not sftp" && poetry run pytest tests/cli
name: Run tests Linux
- name: Run tests Linux
run: |
eval "$(ssh-agent -s)"
poetry run pytest tests/load --ignore tests/load/sources
poetry run pytest tests/cli
env:
DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data
DESTINATION__QDRANT__CREDENTIALS__location: http://localhost:6333
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT: 2222
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME: foo
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD: pass

- name: Stop weaviate
if: always()
run: docker compose -f ".github/weaviate-compose.yml" down -v

- name: Stop SFTP server
if: always()
run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v

2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ markers =
essential: marks all essential tests
no_load: marks tests that do not load anything
needspyarrow17: marks tests that need pyarrow>=17.0.0 (deselected by default)
sftp: marks all sftp tests

1 change: 1 addition & 0 deletions tests/.dlt/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ bucket_url_abfss="abfss://[email protected]"
bucket_url_r2="s3://dlt-ci-test-bucket"
# use "/" as root path
bucket_url_gdrive="gdrive://15eC3e5MNew2XAIefWNlG8VlEa0ISnnaG"
bucket_url_sftp="sftp://localhost/data"
memory="memory:///m"
5 changes: 5 additions & 0 deletions tests/load/filesystem_sftp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import pytest
from tests.utils import skip_if_not_active
from tests.load.utils import ALL_FILESYSTEM_DRIVERS

skip_if_not_active("filesystem")

if "sftp" not in ALL_FILESYSTEM_DRIVERS:
pytest.skip("sftp filesystem driver not configured", allow_module_level=True)
11 changes: 2 additions & 9 deletions tests/load/filesystem_sftp/test_filesystem_sftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def sftp_filesystem():
yield fs


@pytest.mark.sftp
def test_filesystem_sftp_server(sftp_filesystem):
test_file = "/data/countries.json"
input_data = {
Expand Down Expand Up @@ -92,7 +91,6 @@ def test_filesystem_sftp_server(sftp_filesystem):
fs.rm(test_file)


@pytest.mark.sftp
def test_filesystem_sftp_write(sftp_filesystem):
import posixpath
import pyarrow.parquet as pq
Expand Down Expand Up @@ -123,7 +121,6 @@ def states():
assert sorted(result_states) == sorted(expected_states)


@pytest.mark.sftp
@pytest.mark.parametrize("load_content", (True, False))
@pytest.mark.parametrize("glob_filter", ("**", "**/*.csv", "*.txt", "met_csv/A803/*.csv"))
def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None:
Expand All @@ -146,7 +143,6 @@ def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None:
assert_sample_files(all_file_items, fs, config, load_content, glob_filter)


@pytest.mark.sftp
def test_filesystem_sftp_auth_useranme_password():
os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples"
os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222"
Expand All @@ -160,7 +156,6 @@ def test_filesystem_sftp_auth_useranme_password():
assert len(files) > 0


@pytest.mark.sftp
def test_filesystem_sftp_auth_private_key():
os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples"
os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222"
Expand All @@ -175,7 +170,6 @@ def test_filesystem_sftp_auth_private_key():
assert len(files) > 0


@pytest.mark.sftp
def test_filesystem_sftp_auth_private_key_protected():
os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples"
os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222"
Expand All @@ -191,11 +185,11 @@ def test_filesystem_sftp_auth_private_key_protected():
assert len(files) > 0


# Test requires - ssh_agent with user's bobby key loaded. The commands required are:
# Test requires - ssh_agent with user's bobby key loaded. The commands and file names required are:
# eval "$(ssh-agent -s)"
# cp /path/to/tests/load/filesystem_sftp/bobby_rsa* ~/.ssh/id_rsa
# cp /path/to/tests/load/filesystem_sftp/bobby_rsa.pub ~/.ssh/id_rsa.pub
@pytest.mark.sftp
# ssh-add ~/.ssh/id_rsa
@pytest.mark.skipif(
not is_ssh_agent_ready(),
reason="SSH agent is not running or bobby's private key isn't stored in ~/.ssh/id_rsa",
Expand All @@ -214,7 +208,6 @@ def test_filesystem_sftp_auth_private_ssh_agent():
assert len(files) > 0


@pytest.mark.sftp
def test_filesystem_sftp_auth_ca_signed_pub_key():
os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples"
os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222"
Expand Down
3 changes: 3 additions & 0 deletions tests/load/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
GDRIVE_BUCKET = dlt.config.get("tests.bucket_url_gdrive", str)
FILE_BUCKET = dlt.config.get("tests.bucket_url_file", str)
R2_BUCKET = dlt.config.get("tests.bucket_url_r2", str)
SFTP_BUCKET = dlt.config.get("tests.bucket_url_sftp", str)
MEMORY_BUCKET = dlt.config.get("tests.memory", str)

ALL_FILESYSTEM_DRIVERS = dlt.config.get("ALL_FILESYSTEM_DRIVERS", list) or [
Expand All @@ -86,6 +87,7 @@
"file",
"memory",
"r2",
"sftp",
]

# Filter out buckets not in all filesystem drivers
Expand All @@ -97,6 +99,7 @@
ABFS_BUCKET,
AZ_BUCKET,
GDRIVE_BUCKET,
SFTP_BUCKET,
]
WITH_GDRIVE_BUCKETS = [
bucket
Expand Down

0 comments on commit 87ab87a

Please sign in to comment.