From 41fbf4706ea5b0ca96f0edd729be9522fd235a0f Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:29:01 +0200 Subject: [PATCH 01/41] chore: add paramiko dev dependency --- poetry.lock | 91 ++++++++++++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 230b354b97..031b18f5d1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "about-time" @@ -923,6 +923,46 @@ test = ["beautifulsoup4 (>=4.8.0)", "coverage (>=4.5.4)", "fixtures (>=3.0.0)", toml = ["tomli (>=1.1.0)"] yaml = ["PyYAML"] +[[package]] +name = "bcrypt" +version = "4.2.0" +description = "Modern password hashing for your software and your servers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "bcrypt-4.2.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:096a15d26ed6ce37a14c1ac1e48119660f21b24cba457f160a4b830f3fe6b5cb"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c02d944ca89d9b1922ceb8a46460dd17df1ba37ab66feac4870f6862a1533c00"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d84cf6d877918620b687b8fd1bf7781d11e8a0998f576c7aa939776b512b98d"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1bb429fedbe0249465cdd85a58e8376f31bb315e484f16e68ca4c786dcc04291"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:655ea221910bcac76ea08aaa76df427ef8625f92e55a8ee44fbf7753dbabb328"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:1ee38e858bf5d0287c39b7a1fc59eec64bbf880c7d504d3a06a96c16e14058e7"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:0da52759f7f30e83f1e30a888d9163a81353ef224d82dc58eb5bb52efcabc399"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3698393a1b1f1fd5714524193849d0c6d524d33523acca37cd28f02899285060"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:762a2c5fb35f89606a9fde5e51392dad0cd1ab7ae64149a8b935fe8d79dd5ed7"}, + {file = "bcrypt-4.2.0-cp37-abi3-win32.whl", hash = "sha256:5a1e8aa9b28ae28020a3ac4b053117fb51c57a010b9f969603ed885f23841458"}, + {file = "bcrypt-4.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:8f6ede91359e5df88d1f5c1ef47428a4420136f3ce97763e31b86dd8280fbdf5"}, + {file = "bcrypt-4.2.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:c52aac18ea1f4a4f65963ea4f9530c306b56ccd0c6f8c8da0c06976e34a6e841"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bbbfb2734f0e4f37c5136130405332640a1e46e6b23e000eeff2ba8d005da68"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3413bd60460f76097ee2e0a493ccebe4a7601918219c02f503984f0a7ee0aebe"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8d7bb9c42801035e61c109c345a28ed7e84426ae4865511eb82e913df18f58c2"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3d3a6d28cb2305b43feac298774b997e372e56c7c7afd90a12b3dc49b189151c"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:9c1c4ad86351339c5f320ca372dfba6cb6beb25e8efc659bedd918d921956bae"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:27fe0f57bb5573104b5a6de5e4153c60814c711b29364c10a75a54bb6d7ff48d"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8ac68872c82f1add6a20bd489870c71b00ebacd2e9134a8aa3f98a0052ab4b0e"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cb2a8ec2bc07d3553ccebf0746bbf3d19426d1c6d1adbd4fa48925f66af7b9e8"}, + {file = "bcrypt-4.2.0-cp39-abi3-win32.whl", hash = "sha256:77800b7147c9dc905db1cba26abe31e504d8247ac73580b4aa179f98e6608f34"}, + {file = "bcrypt-4.2.0-cp39-abi3-win_amd64.whl", hash = "sha256:61ed14326ee023917ecd093ee6ef422a72f3aec6f07e21ea5f10622b735538a9"}, + {file = "bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:39e1d30c7233cfc54f5c3f2c825156fe044efdd3e0b9d309512cc514a263ec2a"}, + {file = "bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f4f4acf526fcd1c34e7ce851147deedd4e26e6402369304220250598b26448db"}, + {file = "bcrypt-4.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:1ff39b78a52cf03fdf902635e4c81e544714861ba3f0efc56558979dd4f09170"}, + {file = "bcrypt-4.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:373db9abe198e8e2c70d12b479464e0d5092cc122b20ec504097b5f2297ed184"}, + {file = "bcrypt-4.2.0.tar.gz", hash = "sha256:cf69eaf5185fd58f268f805b505ce31f9b9fc2d64b376642164e9244540c1221"}, +] + +[package.extras] +tests = ["pytest (>=3.2.1,!=3.3.0)"] +typecheck = ["mypy"] + [[package]] name = "beautifulsoup4" version = "4.12.2" @@ -6232,6 +6272,27 @@ sql-other = ["SQLAlchemy (>=1.4.36)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.8.0)"] +[[package]] +name = "paramiko" +version = "3.4.1" +description = "SSH2 protocol library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "paramiko-3.4.1-py3-none-any.whl", hash = "sha256:8e49fd2f82f84acf7ffd57c64311aa2b30e575370dc23bdb375b10262f7eac32"}, + {file = "paramiko-3.4.1.tar.gz", hash = "sha256:8b15302870af7f6652f2e038975c1d2973f06046cb5d7d65355668b3ecbece0c"}, +] + +[package.dependencies] +bcrypt = ">=3.2" +cryptography = ">=3.3" +pynacl = ">=1.5" + +[package.extras] +all = ["gssapi (>=1.4.1)", "invoke (>=2.0)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"] +gssapi = ["gssapi (>=1.4.1)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"] +invoke = ["invoke (>=2.0)"] + [[package]] name = "parsedatetime" version = "2.4" @@ -7123,6 +7184,32 @@ files = [ ed25519 = ["PyNaCl (>=1.4.0)"] rsa = ["cryptography"] +[[package]] +name = "pynacl" +version = "1.5.0" +description = "Python binding to the Networking and Cryptography (NaCl) library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"}, + {file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"}, +] + +[package.dependencies] +cffi = ">=1.4.1" + +[package.extras] +docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] +tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] + [[package]] name = "pyodbc" version = "4.0.39" @@ -9684,4 +9771,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "2b8d00f91f33a380b2399989dcac0d1d106d0bd2cd8865c5b7e27a19885753b5" +content-hash = "6f785bc3b710c8752952e233ed848df5aa0cd7edbee73dcfe8fdf045b506cac7" diff --git a/pyproject.toml b/pyproject.toml index d32285572f..ed7d663fc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -155,6 +155,7 @@ pyjwt = "^2.8.0" pytest-mock = "^3.14.0" types-regex = "^2024.5.15.20240519" flake8-print = "^5.0.0" +paramiko = "^3.4.1" [tool.poetry.group.pipeline] optional = true From b398bd8bccb2bacd836308cb94f48e252e3d8c86 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:30:08 +0200 Subject: [PATCH 02/41] test: add container for sftp localhost --- tests/load/filesystem/sftp/Dockerfile | 52 ++++++++++++++++++++++++++ tests/load/filesystem/sftp/foo_rsa | 49 ++++++++++++++++++++++++ tests/load/filesystem/sftp/foo_rsa.pub | 1 + 3 files changed, 102 insertions(+) create mode 100644 tests/load/filesystem/sftp/Dockerfile create mode 100644 tests/load/filesystem/sftp/foo_rsa create mode 100644 tests/load/filesystem/sftp/foo_rsa.pub diff --git a/tests/load/filesystem/sftp/Dockerfile b/tests/load/filesystem/sftp/Dockerfile new file mode 100644 index 0000000000..5d0ec77ec2 --- /dev/null +++ b/tests/load/filesystem/sftp/Dockerfile @@ -0,0 +1,52 @@ +# Use Ubuntu latest as the base image +FROM ubuntu:latest + +# Avoid prompts from apt +ENV DEBIAN_FRONTEND=noninteractive + +# Update packages and install OpenSSH Server and vim +RUN apt-get update && \ + apt-get install -y openssh-server vim && \ + rm -rf /var/lib/apt/lists/* + +# Set up user for SFTP with no shell login +RUN useradd -m -d /home/foo -s /usr/sbin/nologin foo && \ + mkdir -p /home/foo/.ssh && \ + chown foo:foo /home/foo/.ssh && \ + chmod 700 /home/foo/.ssh + +# Set password for the user foo +RUN echo 'foo:pass' | chpasswd + +# Copy the public key +# Ensure you replace 'foo_rsa.pub' with your actual public key file name +COPY foo_rsa.pub /home/foo/.ssh/authorized_keys + +# Set permissions for the public key +RUN chmod 600 /home/foo/.ssh/authorized_keys && \ + chown foo:foo /home/foo/.ssh/authorized_keys + +# Create a directory for SFTP that the user will have access to +RUN mkdir -p /home/foo/sftp/data && \ + chown root:root /home/foo /home/foo/sftp && \ + chmod 755 /home/foo /home/foo/sftp && \ + chown foo:foo /home/foo/sftp/data && \ + chmod 755 /home/foo/sftp/data + +# Configure SSH for SFTP +RUN mkdir -p /run/sshd && \ + echo "Match User foo" >> /etc/ssh/sshd_config && \ + echo " ChrootDirectory /home/foo/sftp" >> /etc/ssh/sshd_config && \ + echo " ForceCommand internal-sftp" >> /etc/ssh/sshd_config && \ + echo " PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + echo " PubkeyAuthentication yes" >> /etc/ssh/sshd_config && \ + echo " PermitTunnel no" >> /etc/ssh/sshd_config && \ + echo " AllowAgentForwarding no" >> /etc/ssh/sshd_config && \ + echo " AllowTcpForwarding no" >> /etc/ssh/sshd_config && \ + echo " X11Forwarding no" >> /etc/ssh/sshd_config + +# Expose the SSH port +EXPOSE 22 + +# Run SSHD on container start +CMD ["/usr/sbin/sshd", "-D", "-e"] diff --git a/tests/load/filesystem/sftp/foo_rsa b/tests/load/filesystem/sftp/foo_rsa new file mode 100644 index 0000000000..9ccc98eccb --- /dev/null +++ b/tests/load/filesystem/sftp/foo_rsa @@ -0,0 +1,49 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn +NhAAAAAwEAAQAAAgEAxz/mzyh9N8KvqW5UVMuKo7reYYyUxU2gGYv/StDs+H3j5UQbrhrA +bwV+R0jgfK4pEAQw2EDDWz5r8pc2LOAq9C+mpha+R6oXZuLwZFCnJOro+peFDEkGVyNReq +Sw2JvKFenRiP2IWqQRtv/zfbcC65TrX4C/DNFuGF/uPjZJH8v4rGN+MtAcrQZDi7V4pvAe +FejIeVumlU5rZ6XN1hOID3BrmybiL8cqcyGTVolYbnKdyaPRmjmXgGBK0vMecJhvbWzVlW +7iLnXnaULXFiaKxXlvizxwMampCNMbD6j0+b6DolYDjEj0BRuEWGr2Ox3opqKhWww1eAtj +spY7PpGAoeCq1Mfvn178Bv0sRZykJT4gM4ELOEhmItl6QSFn5kLg8vDRg0j8CHe5scVSVG +3egjYVcNxL/05okSiWIwNLd7Ma5Qyi06BS6JUqqFEDhANIRgMZuztMs1e6IKtiiREeOhf4 +UQLcMsH8r0AYjJPXFTpXvWLamsiAhsL1tGa8XyyxqdJrqsIktrqS4QnCx7uDAPon0wHoRK +YzCwqNmn1Iwf3uepZ7Z0evh3IRRfSI3LaRxfhM0I63z8uPyzx55C3PAkJpmRiPGzFWd7Nk +1P6OgRsjhlSfEK8OwOFXfZLYoRBEYj8yUXRhJu3CdcJViCIJYuhtY38fST3875aTx1KJOq +sAAAdIRs3hz0bN4c8AAAAHc3NoLXJzYQAAAgEAxz/mzyh9N8KvqW5UVMuKo7reYYyUxU2g +GYv/StDs+H3j5UQbrhrAbwV+R0jgfK4pEAQw2EDDWz5r8pc2LOAq9C+mpha+R6oXZuLwZF +CnJOro+peFDEkGVyNReqSw2JvKFenRiP2IWqQRtv/zfbcC65TrX4C/DNFuGF/uPjZJH8v4 +rGN+MtAcrQZDi7V4pvAeFejIeVumlU5rZ6XN1hOID3BrmybiL8cqcyGTVolYbnKdyaPRmj +mXgGBK0vMecJhvbWzVlW7iLnXnaULXFiaKxXlvizxwMampCNMbD6j0+b6DolYDjEj0BRuE +WGr2Ox3opqKhWww1eAtjspY7PpGAoeCq1Mfvn178Bv0sRZykJT4gM4ELOEhmItl6QSFn5k +Lg8vDRg0j8CHe5scVSVG3egjYVcNxL/05okSiWIwNLd7Ma5Qyi06BS6JUqqFEDhANIRgMZ +uztMs1e6IKtiiREeOhf4UQLcMsH8r0AYjJPXFTpXvWLamsiAhsL1tGa8XyyxqdJrqsIktr +qS4QnCx7uDAPon0wHoRKYzCwqNmn1Iwf3uepZ7Z0evh3IRRfSI3LaRxfhM0I63z8uPyzx5 +5C3PAkJpmRiPGzFWd7Nk1P6OgRsjhlSfEK8OwOFXfZLYoRBEYj8yUXRhJu3CdcJViCIJYu +htY38fST3875aTx1KJOqsAAAADAQABAAACAFjd6WXP2zl5fbuF19sSBT3NZM4BU4FEg/mg +9TY7RNX34CMrY2UdzWI3AwFsQaOaUfowxFBPYlJZ3u+N/b26Ja5PanZ9glSYSmO7KBi12D +ahB1RtLAw0rb1DpV2cArw5j8KCTNBas+wpbTU/pywU6hqEiw5Hb+6Zog8BClN5BthFsx3A +KlMjewa42nt/btaWFfUTpAZsmDnThhfuXYXzpCWusG+8wfkpTYeYHAzmqShpunJqvFubAD +VjvTuk75ishFY7ym8hy4OJVrMd+qyIeDBnXxas2CVuVFP5RAKSCuEw1akbz+1LxHpasYZ1 +/miTiGZ59pmTMm3eNpM6aiYX41SFsx7plNIbHG/BYbardc9ZVrZZO/fyh2LHzQ8vGALxHP +ydIUQVKmcWXjFwBVwCZZc83FVyPfdSH3wxK/6MiAAWNDw564d3lrMHZ94n5EX3dKj/+mLB +okG2FTxhDhYGwxCcOoE0YCHHBfxOu08RRpXYLvYPFzrhuHKslJCVsM6BgebBSxlhcJDIZa +NOloE5COuh95byGmMr5DYnimsEXSpJFz4nzscFF/gkoTn1juDOTmhO9D8blFLHOtwCJ3ix +juASf6ydiHTutGHAzOIMXC8K3Tci9rUQtoNMStUypBzAFx7s6Cv62/wqCsWno/Ne/G9eFB +U3bAtyVHGIYuvRujBBAAABAQDrZ8rR3kBxbqaEN9oy9xKn4i7UiB/LRXPjOdmFIH7Lo51g +1yQ+jWtEj6nOsFHw+yhei8QdFyI8xxf7+H6PZY6anGpr48hfuE86bcPFT/6VetfjkOSaPb +mfTqckhH0Cye1AH5nmrpgr75E3eVZcg4dt7s8E7R3XIi+qUVYN9ERfwO6AzYS6A3AM144c +u3bG5WxcbhgFdGy6iB26B9UO6+VYvu7HMn0MP+dUU3sdsV+rTQJHT5zA5oIWeRL5BSxnQk +NpuacEyFXiAAMqrBZp97bO0e9dymoMFvbznxgPQcla003PTizLMOnE01USqd4jcE3+F43S +eAdu0k69Hl0tzwv8AAABAQD0l7N0XeJPzcveCB7FSSqTVHxztCpcWdRhul1kQs91Zp/0sN +lwcyAE9ADjC5zsyeAx9Q5TF94HUQ3iuG/aICd6ItOeK06X/r1e3/ole6K7lbSzDnLZ+Fi+ +IvVXCCjRmXMI/m+4+vIXs6y9SZTxmKNm47Wpfd8fumYzfSBL54o5AnKvUYYGiwwuAlKMz1 +rgvRP4ZrzQJZitCh86/jCgRwCDMniu5GGUDXxhvNPoQqeO1ezLV0v9K1SZvd5U0uXnKWop +SLtx2K4s+yyjpxnWyIZmAwjh5paHjLKegXTYskg4n/3fBW9nEJVrFflLVWdzp+xWsz+u3P +sQ3n0efAZPjOWbAAABAQDQitS0R2KAOci5f8LiGy2KtJipcS2jvzBCZX9DqMInfpU/cY92 +bAn8NgY9G8jChloAu2smHo3Fx6LL4ZdFNTjfIBbFyZsuSPhy9czpxWF733mteyuMW6n2jv +e04YoWRp4uh9YxbtpFx24x0RIu64NM69/N2E94eFEJUhpG7NPPgoL41jEqJVJCPVJsQnFT +RCReMb5D9zWYlKVK4xnuB/NqgD+j5iLMHK5hS6Wt0b0olNoOmTlj7IUK62sQyPelU3I3Sy +hmZquXqCILq/rMbJBo5NjhoodvzSFYw+jDvLq4rK+XGL/DgGV080oemXTAP73Er8tBdJq8 +iv8eCatJLFgxAAAAD2Zvb0BleGFtcGxlLmNvbQECAw== +-----END OPENSSH PRIVATE KEY----- diff --git a/tests/load/filesystem/sftp/foo_rsa.pub b/tests/load/filesystem/sftp/foo_rsa.pub new file mode 100644 index 0000000000..212cbb4d7e --- /dev/null +++ b/tests/load/filesystem/sftp/foo_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHP+bPKH03wq+pblRUy4qjut5hjJTFTaAZi/9K0Oz4fePlRBuuGsBvBX5HSOB8rikQBDDYQMNbPmvylzYs4Cr0L6amFr5Hqhdm4vBkUKck6uj6l4UMSQZXI1F6pLDYm8oV6dGI/YhapBG2//N9twLrlOtfgL8M0W4YX+4+Nkkfy/isY34y0BytBkOLtXim8B4V6Mh5W6aVTmtnpc3WE4gPcGubJuIvxypzIZNWiVhucp3Jo9GaOZeAYErS8x5wmG9tbNWVbuIudedpQtcWJorFeW+LPHAxqakI0xsPqPT5voOiVgOMSPQFG4RYavY7HeimoqFbDDV4C2Oyljs+kYCh4KrUx++fXvwG/SxFnKQlPiAzgQs4SGYi2XpBIWfmQuDy8NGDSPwId7mxxVJUbd6CNhVw3Ev/TmiRKJYjA0t3sxrlDKLToFLolSqoUQOEA0hGAxm7O0yzV7ogq2KJER46F/hRAtwywfyvQBiMk9cVOle9YtqayICGwvW0ZrxfLLGp0muqwiS2upLhCcLHu4MA+ifTAehEpjMLCo2afUjB/e56lntnR6+HchFF9IjctpHF+EzQjrfPy4/LPHnkLc8CQmmZGI8bMVZ3s2TU/o6BGyOGVJ8Qrw7A4Vd9ktihEERiPzJRdGEm7cJ1wlWIIgli6G1jfx9JPfzvlpPHUok6qw== foo@example.com From febff5630f1436d6b53aa867bb756d2fd6407e51 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:30:27 +0200 Subject: [PATCH 03/41] chore: add tmp bash scripts --- tests/load/filesystem/sftp/server_build | 3 +++ tests/load/filesystem/sftp/server_connect | 6 ++++++ tests/load/filesystem/sftp/server_run | 3 +++ 3 files changed, 12 insertions(+) create mode 100755 tests/load/filesystem/sftp/server_build create mode 100755 tests/load/filesystem/sftp/server_connect create mode 100755 tests/load/filesystem/sftp/server_run diff --git a/tests/load/filesystem/sftp/server_build b/tests/load/filesystem/sftp/server_build new file mode 100755 index 0000000000..c67eb8b8f0 --- /dev/null +++ b/tests/load/filesystem/sftp/server_build @@ -0,0 +1,3 @@ +#!/bin/bash + +docker build -t my-sftp-server . \ No newline at end of file diff --git a/tests/load/filesystem/sftp/server_connect b/tests/load/filesystem/sftp/server_connect new file mode 100755 index 0000000000..8b92b62c46 --- /dev/null +++ b/tests/load/filesystem/sftp/server_connect @@ -0,0 +1,6 @@ +#!/bin/sh + +# sftp client: sftp -i foo_rsa -oPort=2222 foo@localhost + +# python fsspec and paramiko +python connect.py \ No newline at end of file diff --git a/tests/load/filesystem/sftp/server_run b/tests/load/filesystem/sftp/server_run new file mode 100755 index 0000000000..41ebb19243 --- /dev/null +++ b/tests/load/filesystem/sftp/server_run @@ -0,0 +1,3 @@ +#!/bin/bash + +docker run -d --name my_sftp_container -p 2222:22 my-sftp-server \ No newline at end of file From f56dae3abf944515d2d960df3b647b45b21809a2 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:31:04 +0200 Subject: [PATCH 04/41] exp: sftp client with fsspec --- .../dlt-ecosystem/destinations/filesystem.md | 7 +++++ tests/load/filesystem/sftp/__init__.py | 0 tests/load/filesystem/sftp/connect.py | 27 +++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 tests/load/filesystem/sftp/__init__.py create mode 100644 tests/load/filesystem/sftp/connect.py diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index 018b838363..ad55780084 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -259,6 +259,13 @@ bucket_url='\\?\UNC\localhost\c$\a\b\c' ``` ::: +### SFTP + +TODO: add explanation and examples +```py +pip install paramiko +``` + ## Write disposition The filesystem destination handles the write dispositions as follows: - `append` - files belonging to such tables are added to the dataset folder diff --git a/tests/load/filesystem/sftp/__init__.py b/tests/load/filesystem/sftp/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/load/filesystem/sftp/connect.py b/tests/load/filesystem/sftp/connect.py new file mode 100644 index 0000000000..afdfb7f376 --- /dev/null +++ b/tests/load/filesystem/sftp/connect.py @@ -0,0 +1,27 @@ +import fsspec + +# fsspec ssh_args available are: +# - hostname, port=22, username=None, password=None, pkey=None, key_filename=None, timeout=None, allow_agent=True, look_for_keys=True, compress=False, sock=None, gss_auth=False, gss_kex=False, gss_deleg_creds=True, gss_host=None, banner_timeout=None, auth_timeout=None, channel_timeout=None, gss_trust_dns=True, passphrase=None, disabled_algorithms=None, transport_factory=None, auth_strategy=None +# - url: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect + +# Set up connection to the localhost SFTP server: +## 1. using generated ssh key +fs = fsspec.filesystem("sftp", host="localhost", port=2222, username="foo", key_filename="foo_rsa") + +## 2. using linux user and password +# fs = fsspec.filesystem("sftp", host="localhost", port=2222, username="foo", password = "pass") + +# List files on the SFTP server +print(fs.ls("/data")) + +# Write data to a file +with fs.open("/data/hello.txt", "w") as f: + f.write("This is a new file added via SFTP!") + +# Read data from the file +with fs.open("/data/hello.txt", "r") as f: + data = f.read() + print(data) + +# List files on the SFTP server +print(fs.ls("/data")) From 9d6bf15a4949765df046ab4c18f7f806ab8f682d Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:48:43 +0200 Subject: [PATCH 05/41] chore: sftp timestamp metadata discovered --- tests/load/filesystem/sftp/connect.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/load/filesystem/sftp/connect.py b/tests/load/filesystem/sftp/connect.py index afdfb7f376..eb29895335 100644 --- a/tests/load/filesystem/sftp/connect.py +++ b/tests/load/filesystem/sftp/connect.py @@ -14,14 +14,19 @@ # List files on the SFTP server print(fs.ls("/data")) +example = "/data/hello.txt" + # Write data to a file -with fs.open("/data/hello.txt", "w") as f: +with fs.open(example, "w") as f: f.write("This is a new file added via SFTP!") # Read data from the file -with fs.open("/data/hello.txt", "r") as f: +with fs.open(example, "r") as f: data = f.read() print(data) +# Check file metada -> confirmed! field mtime (modification time) available +print(fs.info(example)) + # List files on the SFTP server print(fs.ls("/data")) From 6eb40b4e2e068ad2941f55747afd0e73ce0d8156 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:51:30 +0200 Subject: [PATCH 06/41] fix: docs lint --- docs/website/docs/dlt-ecosystem/destinations/filesystem.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index ad55780084..03f6a86337 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -262,7 +262,8 @@ bucket_url='\\?\UNC\localhost\c$\a\b\c' ### SFTP TODO: add explanation and examples -```py + +```sh pip install paramiko ``` From e690a70ddfe41f835cc4f82ea25778988dd0c877 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 20:05:27 +0200 Subject: [PATCH 07/41] feat: add fsspec protocol sftp --- dlt/common/configuration/specs/__init__.py | 2 ++ .../configuration/specs/sftp_crendentials.py | 29 +++++++++++++++++++ dlt/common/storages/configuration.py | 9 +++++- dlt/common/storages/fsspec_filesystem.py | 7 +++++ 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 dlt/common/configuration/specs/sftp_crendentials.py diff --git a/dlt/common/configuration/specs/__init__.py b/dlt/common/configuration/specs/__init__.py index f1d7d819ff..179445dde3 100644 --- a/dlt/common/configuration/specs/__init__.py +++ b/dlt/common/configuration/specs/__init__.py @@ -28,6 +28,7 @@ AnyAzureCredentials, ) +from .sftp_crendentials import SFTPCredentials # backward compatibility for service account credentials from .gcp_credentials import ( @@ -62,4 +63,5 @@ "AnyAzureCredentials", "GcpClientCredentials", "GcpClientCredentialsWithDefault", + "SFTPCredentials", ] diff --git a/dlt/common/configuration/specs/sftp_crendentials.py b/dlt/common/configuration/specs/sftp_crendentials.py new file mode 100644 index 0000000000..8b153281e4 --- /dev/null +++ b/dlt/common/configuration/specs/sftp_crendentials.py @@ -0,0 +1,29 @@ +from typing import Any, Dict, Optional + +from dlt.common.typing import TSecretValue +from dlt.common.configuration.specs.base_configuration import CredentialsConfiguration, configspec + + +@configspec +class SFTPCredentials(CredentialsConfiguration): + # TODO: separate config and secrets + sftp_port: TSecretValue = None + sftp_username: TSecretValue = None + sftp_password: Optional[TSecretValue] = None + sftp_key_filename: Optional[TSecretValue] = None # path to the private key file + sftp_key_passphrase: Optional[TSecretValue] = None # passphrase for the private key + + def to_fsspec_credentials(self) -> Dict[str, Any]: + """Return a dict that can be passed to fsspec/sftp""" + + # fsspec/sftp (ssh_args) args: + # - hostname, port=22, username=None, password=None, pkey=None, key_filename=None, timeout=None, allow_agent=True, look_for_keys=True, compress=False, sock=None, gss_auth=False, gss_kex=False, gss_deleg_creds=True, gss_host=None, banner_timeout=None, auth_timeout=None, channel_timeout=None, gss_trust_dns=True, passphrase=None, disabled_algorithms=None, transport_factory=None, auth_strategy=None + # link: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect + + return dict( + port=self.sftp_port, + username=self.sftp_username, + password=self.sftp_password, + key_filename=self.sftp_key_filename, + passphrase=self.sftp_key_passphrase, + ) diff --git a/dlt/common/storages/configuration.py b/dlt/common/storages/configuration.py index 04780528c4..1a0c0f5c4f 100644 --- a/dlt/common/storages/configuration.py +++ b/dlt/common/storages/configuration.py @@ -12,6 +12,7 @@ GcpOAuthCredentials, AnyAzureCredentials, BaseConfiguration, + SFTPCredentials, ) from dlt.common.typing import DictStrAny from dlt.common.utils import digest128 @@ -48,7 +49,11 @@ class LoadStorageConfiguration(BaseConfiguration): FileSystemCredentials = Union[ - AwsCredentials, GcpServiceAccountCredentials, AnyAzureCredentials, GcpOAuthCredentials + AwsCredentials, + GcpServiceAccountCredentials, + AnyAzureCredentials, + GcpOAuthCredentials, + SFTPCredentials, ] @@ -109,6 +114,7 @@ class FilesystemConfiguration(BaseConfiguration): * az, abfs, adl, abfss, azure * file, memory * gdrive + * sftp """ PROTOCOL_CREDENTIALS: ClassVar[Dict[str, Any]] = { @@ -121,6 +127,7 @@ class FilesystemConfiguration(BaseConfiguration): "adl": AnyAzureCredentials, "abfss": AnyAzureCredentials, "azure": AnyAzureCredentials, + "sftp": SFTPCredentials, } bucket_url: str = None diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index 7da5ebabef..4e1fb72ed1 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -30,6 +30,7 @@ GcpCredentials, AwsCredentials, AzureCredentials, + SFTPCredentials, ) from dlt.common.exceptions import MissingDependencyException from dlt.common.storages.configuration import ( @@ -64,6 +65,7 @@ class FileItem(TypedDict, total=False): "file": lambda f: ensure_pendulum_datetime(f["mtime"]), "memory": lambda f: ensure_pendulum_datetime(f["created"]), "gdrive": lambda f: ensure_pendulum_datetime(f["modifiedTime"]), + "sftp": lambda f: ensure_pendulum_datetime(f["mtime"]), } # Support aliases MTIME_DISPATCH["gs"] = MTIME_DISPATCH["gcs"] @@ -77,6 +79,7 @@ class FileItem(TypedDict, total=False): "az": lambda config: cast(AzureCredentials, config.credentials).to_adlfs_credentials(), "gs": lambda config: cast(GcpCredentials, config.credentials).to_gcs_credentials(), "gdrive": lambda config: {"credentials": cast(GcpCredentials, config.credentials)}, + "sftp": lambda config: cast(SFTPCredentials, config.credentials).to_fsspec_credentials(), } CREDENTIALS_DISPATCH["adl"] = CREDENTIALS_DISPATCH["az"] CREDENTIALS_DISPATCH["abfs"] = CREDENTIALS_DISPATCH["az"] @@ -125,6 +128,9 @@ def prepare_fsspec_args(config: FilesystemConfiguration) -> DictStrAny: register_implementation("gdrive", GoogleDriveFileSystem, "GoogleDriveFileSystem") + if protocol == "sftp": + fs_kwargs.clear() + if config.kwargs is not None: fs_kwargs.update(config.kwargs) if config.client_kwargs is not None: @@ -144,6 +150,7 @@ def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSys * s3 * az, abfs, abfss, adl, azure * gcs, gs + * sftp All other filesystems are not authenticated From 89b097b47ca2d059f8a72abd4c27726e0b003355 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 29 Aug 2024 20:20:02 +0200 Subject: [PATCH 08/41] fix: lint errors from devel --- dlt/common/libs/deltalake.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlt/common/libs/deltalake.py b/dlt/common/libs/deltalake.py index 38b23ea27a..92f222a4c4 100644 --- a/dlt/common/libs/deltalake.py +++ b/dlt/common/libs/deltalake.py @@ -174,7 +174,7 @@ def get_delta_tables( def _deltalake_storage_options(config: FilesystemConfiguration) -> Dict[str, str]: """Returns dict that can be passed as `storage_options` in `deltalake` library.""" - creds = {} + creds = {} # type: ignore extra_options = {} # TODO: create a mixin with to_object_store_rs_credentials for a proper discovery if hasattr(config.credentials, "to_object_store_rs_credentials"): From 5fd34befd836316bb79c696942928947e0ed0083 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Mon, 2 Sep 2024 18:01:52 +0200 Subject: [PATCH 09/41] test: sftp server localhost --- .../test_destination_filesystem_sftp.yml | 83 ++++++++++++++++++ tests/load/filesystem/sftp/connect.py | 32 ------- tests/load/filesystem/sftp/server_build | 3 - tests/load/filesystem/sftp/server_connect | 6 -- tests/load/filesystem/sftp/server_run | 3 - .../sftp => filesystem_sftp}/Dockerfile | 0 .../sftp => filesystem_sftp}/__init__.py | 0 tests/load/filesystem_sftp/docker-compose.yml | 11 +++ .../sftp => filesystem_sftp}/foo_rsa | 0 .../sftp => filesystem_sftp}/foo_rsa.pub | 0 .../filesystem_sftp/test_filesystem_sftp.py | 85 +++++++++++++++++++ 11 files changed, 179 insertions(+), 44 deletions(-) create mode 100644 .github/workflows/test_destination_filesystem_sftp.yml delete mode 100644 tests/load/filesystem/sftp/connect.py delete mode 100755 tests/load/filesystem/sftp/server_build delete mode 100755 tests/load/filesystem/sftp/server_connect delete mode 100755 tests/load/filesystem/sftp/server_run rename tests/load/{filesystem/sftp => filesystem_sftp}/Dockerfile (100%) rename tests/load/{filesystem/sftp => filesystem_sftp}/__init__.py (100%) create mode 100644 tests/load/filesystem_sftp/docker-compose.yml rename tests/load/{filesystem/sftp => filesystem_sftp}/foo_rsa (100%) rename tests/load/{filesystem/sftp => filesystem_sftp}/foo_rsa.pub (100%) create mode 100644 tests/load/filesystem_sftp/test_filesystem_sftp.py diff --git a/.github/workflows/test_destination_filesystem_sftp.yml b/.github/workflows/test_destination_filesystem_sftp.yml new file mode 100644 index 0000000000..f257962239 --- /dev/null +++ b/.github/workflows/test_destination_filesystem_sftp.yml @@ -0,0 +1,83 @@ + +name: test | filesystem sftp + +on: + pull_request: + branches: + - master + - devel + workflow_dispatch: + schedule: + - cron: '0 2 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUNTIME__SENTRY_DSN: https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752 + RUNTIME__LOG_LEVEL: ERROR + RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} + + ACTIVE_DESTINATIONS: "[\"filesystem\"]" + ALL_FILESYSTEM_DRIVERS: "[\"memory\"]" + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + if: ${{ !github.event.pull_request.head.repo.fork || contains(github.event.pull_request.labels.*.name, 'ci from fork')}} + + run_loader: + name: test | filesystem sftp tests + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' + defaults: + run: + shell: bash + runs-on: "ubuntu-latest" + + steps: + + - name: Check out + uses: actions/checkout@master + + - name: Start SFTP server + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10.x" + + - name: Install Poetry + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp + + - name: Install dependencies + run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline + + - run: | + poetry run pytest tests/load/filesystem_sftp + if: runner.os != 'Windows' + name: Run tests Linux/MAC + + - run: | + poetry run pytest tests/load/filesystem_sftp + if: runner.os == 'Windows' + name: Run tests Windows + shell: cmd + + - name: Stop SFTP server + if: always() + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v diff --git a/tests/load/filesystem/sftp/connect.py b/tests/load/filesystem/sftp/connect.py deleted file mode 100644 index eb29895335..0000000000 --- a/tests/load/filesystem/sftp/connect.py +++ /dev/null @@ -1,32 +0,0 @@ -import fsspec - -# fsspec ssh_args available are: -# - hostname, port=22, username=None, password=None, pkey=None, key_filename=None, timeout=None, allow_agent=True, look_for_keys=True, compress=False, sock=None, gss_auth=False, gss_kex=False, gss_deleg_creds=True, gss_host=None, banner_timeout=None, auth_timeout=None, channel_timeout=None, gss_trust_dns=True, passphrase=None, disabled_algorithms=None, transport_factory=None, auth_strategy=None -# - url: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect - -# Set up connection to the localhost SFTP server: -## 1. using generated ssh key -fs = fsspec.filesystem("sftp", host="localhost", port=2222, username="foo", key_filename="foo_rsa") - -## 2. using linux user and password -# fs = fsspec.filesystem("sftp", host="localhost", port=2222, username="foo", password = "pass") - -# List files on the SFTP server -print(fs.ls("/data")) - -example = "/data/hello.txt" - -# Write data to a file -with fs.open(example, "w") as f: - f.write("This is a new file added via SFTP!") - -# Read data from the file -with fs.open(example, "r") as f: - data = f.read() - print(data) - -# Check file metada -> confirmed! field mtime (modification time) available -print(fs.info(example)) - -# List files on the SFTP server -print(fs.ls("/data")) diff --git a/tests/load/filesystem/sftp/server_build b/tests/load/filesystem/sftp/server_build deleted file mode 100755 index c67eb8b8f0..0000000000 --- a/tests/load/filesystem/sftp/server_build +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -docker build -t my-sftp-server . \ No newline at end of file diff --git a/tests/load/filesystem/sftp/server_connect b/tests/load/filesystem/sftp/server_connect deleted file mode 100755 index 8b92b62c46..0000000000 --- a/tests/load/filesystem/sftp/server_connect +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -# sftp client: sftp -i foo_rsa -oPort=2222 foo@localhost - -# python fsspec and paramiko -python connect.py \ No newline at end of file diff --git a/tests/load/filesystem/sftp/server_run b/tests/load/filesystem/sftp/server_run deleted file mode 100755 index 41ebb19243..0000000000 --- a/tests/load/filesystem/sftp/server_run +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -docker run -d --name my_sftp_container -p 2222:22 my-sftp-server \ No newline at end of file diff --git a/tests/load/filesystem/sftp/Dockerfile b/tests/load/filesystem_sftp/Dockerfile similarity index 100% rename from tests/load/filesystem/sftp/Dockerfile rename to tests/load/filesystem_sftp/Dockerfile diff --git a/tests/load/filesystem/sftp/__init__.py b/tests/load/filesystem_sftp/__init__.py similarity index 100% rename from tests/load/filesystem/sftp/__init__.py rename to tests/load/filesystem_sftp/__init__.py diff --git a/tests/load/filesystem_sftp/docker-compose.yml b/tests/load/filesystem_sftp/docker-compose.yml new file mode 100644 index 0000000000..4dd9ae58b6 --- /dev/null +++ b/tests/load/filesystem_sftp/docker-compose.yml @@ -0,0 +1,11 @@ +version: '3.8' + +services: + + sftpserver: + build: + context: . + dockerfile: Dockerfile + image: sftpserver:latest + ports: + - "2222:22" diff --git a/tests/load/filesystem/sftp/foo_rsa b/tests/load/filesystem_sftp/foo_rsa similarity index 100% rename from tests/load/filesystem/sftp/foo_rsa rename to tests/load/filesystem_sftp/foo_rsa diff --git a/tests/load/filesystem/sftp/foo_rsa.pub b/tests/load/filesystem_sftp/foo_rsa.pub similarity index 100% rename from tests/load/filesystem/sftp/foo_rsa.pub rename to tests/load/filesystem_sftp/foo_rsa.pub diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py new file mode 100644 index 0000000000..ce0af04500 --- /dev/null +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -0,0 +1,85 @@ +import pytest +import os +import fsspec +from dlt.common.json import json +import dlt +from dlt.destinations import filesystem +from tests.common.configuration.utils import environment +from dlt.destinations.impl.filesystem.filesystem import FilesystemClient + + +@pytest.fixture(scope="module") +def sftp_filesystem(): + # Determine the path to the SSH key relative to this Python file + current_dir = os.path.dirname(os.path.abspath(__file__)) + key_path = os.path.join(current_dir, "foo_rsa") + + # Set up the SFTP filesystem connection using the relative SSH key path + fs = fsspec.filesystem( + "sftp", host="localhost", port=2222, username="foo", key_filename=key_path + ) + yield fs + + +def test_sftp_server(sftp_filesystem): + fs = sftp_filesystem + test_file = "/data/countries.json" + json_data = { + "countries": [ + {"name": "United States", "code": "US"}, + {"name": "Canada", "code": "CA"}, + {"name": "Mexico", "code": "MX"}, + ] + } + try: + with fs.open(test_file, "w") as f: + json.dump(json_data, f) + + files = fs.ls("/data") + assert test_file in files + + with fs.open(test_file, "r") as f: + data = json.load(f) + assert data == json_data + + info = fs.info(test_file) + assert "mtime" in info + assert info["type"] == "file" + + finally: + fs.rm(test_file) + + +def test_pipeline_filesystem_sftp_destination(sftp_filesystem, environment): + environment["DESTINATION__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data" + environment["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + environment["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" + environment["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "pass" + + @dlt.resource() + def states(): + yield [{"id": 1, "name": "DE"}, {"id": 2, "name": "AK"}, {"id": 3, "name": "CA"}] + + # f = filesystem(bucket_url="sftp://localhost/data") + pipeline = dlt.pipeline(destination="filesystem", dataset_name="test") + + pipeline.run([states], loader_file_format="parquet") + + client: FilesystemClient = pipeline.destination_client() # type: ignore[assignment] + + import posixpath + + data_glob = posixpath.join(client.dataset_path, "states/*") + data_files = client.fs_client.glob(data_glob) + assert len(data_files) > 0 + + fs = sftp_filesystem + with fs.open(data_files[0], "rb") as f: + import pyarrow.parquet as pq + + rows = pq.read_table(f).to_pylist() + + result_states = [r["name"] for r in rows] + + expected_states = ["DE", "AK", "CA"] + assert sorted(result_states) == sorted(expected_states) From 4091119b955893e445886123e0934fd49ab34942 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Mon, 2 Sep 2024 20:28:28 +0200 Subject: [PATCH 10/41] fix: filesystem SFTP docker-compose tests --- .../test_destination_filesystem_sftp.yml | 2 +- .../configuration/specs/sftp_crendentials.py | 19 +++++++++++-------- tests/load/filesystem_sftp/__init__.py | 3 +++ .../{ => bootstrap}/Dockerfile | 0 .../filesystem_sftp/{ => bootstrap}/foo_rsa | 0 .../{ => bootstrap}/foo_rsa.pub | 0 tests/load/filesystem_sftp/docker-compose.yml | 8 +++++++- .../filesystem_sftp/test_filesystem_sftp.py | 19 ++++++++++--------- tests/utils.py | 2 ++ 9 files changed, 34 insertions(+), 19 deletions(-) rename tests/load/filesystem_sftp/{ => bootstrap}/Dockerfile (100%) rename tests/load/filesystem_sftp/{ => bootstrap}/foo_rsa (100%) rename tests/load/filesystem_sftp/{ => bootstrap}/foo_rsa.pub (100%) diff --git a/.github/workflows/test_destination_filesystem_sftp.yml b/.github/workflows/test_destination_filesystem_sftp.yml index f257962239..74a2fdf626 100644 --- a/.github/workflows/test_destination_filesystem_sftp.yml +++ b/.github/workflows/test_destination_filesystem_sftp.yml @@ -19,7 +19,7 @@ env: RUNTIME__LOG_LEVEL: ERROR RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} - ACTIVE_DESTINATIONS: "[\"filesystem\"]" + ACTIVE_DESTINATIONS: "[\"filesystem-sftp\"]" ALL_FILESYSTEM_DRIVERS: "[\"memory\"]" jobs: diff --git a/dlt/common/configuration/specs/sftp_crendentials.py b/dlt/common/configuration/specs/sftp_crendentials.py index 8b153281e4..4ff36efaa3 100644 --- a/dlt/common/configuration/specs/sftp_crendentials.py +++ b/dlt/common/configuration/specs/sftp_crendentials.py @@ -1,17 +1,19 @@ from typing import Any, Dict, Optional -from dlt.common.typing import TSecretValue +from dlt.common.typing import TSecretValue, TSecretStrValue, DictStrAny from dlt.common.configuration.specs.base_configuration import CredentialsConfiguration, configspec @configspec class SFTPCredentials(CredentialsConfiguration): + """Credentials for SFTP filesystem, compatible with fsspec/sftp""" + # TODO: separate config and secrets - sftp_port: TSecretValue = None - sftp_username: TSecretValue = None - sftp_password: Optional[TSecretValue] = None - sftp_key_filename: Optional[TSecretValue] = None # path to the private key file - sftp_key_passphrase: Optional[TSecretValue] = None # passphrase for the private key + sftp_port: str = None + sftp_username: str = None + sftp_password: Optional[TSecretStrValue] = None + sftp_key_filename: Optional[TSecretStrValue] = None # path to the private key file + sftp_key_passphrase: Optional[TSecretStrValue] = None # passphrase for the private key def to_fsspec_credentials(self) -> Dict[str, Any]: """Return a dict that can be passed to fsspec/sftp""" @@ -19,11 +21,12 @@ def to_fsspec_credentials(self) -> Dict[str, Any]: # fsspec/sftp (ssh_args) args: # - hostname, port=22, username=None, password=None, pkey=None, key_filename=None, timeout=None, allow_agent=True, look_for_keys=True, compress=False, sock=None, gss_auth=False, gss_kex=False, gss_deleg_creds=True, gss_host=None, banner_timeout=None, auth_timeout=None, channel_timeout=None, gss_trust_dns=True, passphrase=None, disabled_algorithms=None, transport_factory=None, auth_strategy=None # link: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect - - return dict( + credentials: DictStrAny = dict( port=self.sftp_port, username=self.sftp_username, password=self.sftp_password, key_filename=self.sftp_key_filename, passphrase=self.sftp_key_passphrase, ) + + return credentials diff --git a/tests/load/filesystem_sftp/__init__.py b/tests/load/filesystem_sftp/__init__.py index e69de29bb2..c5e215e0c5 100644 --- a/tests/load/filesystem_sftp/__init__.py +++ b/tests/load/filesystem_sftp/__init__.py @@ -0,0 +1,3 @@ +from tests.utils import skip_if_not_active + +skip_if_not_active("filesystem-sftp") diff --git a/tests/load/filesystem_sftp/Dockerfile b/tests/load/filesystem_sftp/bootstrap/Dockerfile similarity index 100% rename from tests/load/filesystem_sftp/Dockerfile rename to tests/load/filesystem_sftp/bootstrap/Dockerfile diff --git a/tests/load/filesystem_sftp/foo_rsa b/tests/load/filesystem_sftp/bootstrap/foo_rsa similarity index 100% rename from tests/load/filesystem_sftp/foo_rsa rename to tests/load/filesystem_sftp/bootstrap/foo_rsa diff --git a/tests/load/filesystem_sftp/foo_rsa.pub b/tests/load/filesystem_sftp/bootstrap/foo_rsa.pub similarity index 100% rename from tests/load/filesystem_sftp/foo_rsa.pub rename to tests/load/filesystem_sftp/bootstrap/foo_rsa.pub diff --git a/tests/load/filesystem_sftp/docker-compose.yml b/tests/load/filesystem_sftp/docker-compose.yml index 4dd9ae58b6..fc692ff616 100644 --- a/tests/load/filesystem_sftp/docker-compose.yml +++ b/tests/load/filesystem_sftp/docker-compose.yml @@ -4,8 +4,14 @@ services: sftpserver: build: - context: . + context: bootstrap dockerfile: Dockerfile image: sftpserver:latest + networks: + - sftpserver ports: - "2222:22" + +networks: + sftpserver: + name: sftpserver diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index ce0af04500..4ea5a516cf 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -1,18 +1,18 @@ import pytest import os import fsspec -from dlt.common.json import json +import json +import os import dlt -from dlt.destinations import filesystem -from tests.common.configuration.utils import environment from dlt.destinations.impl.filesystem.filesystem import FilesystemClient +import logging @pytest.fixture(scope="module") def sftp_filesystem(): # Determine the path to the SSH key relative to this Python file current_dir = os.path.dirname(os.path.abspath(__file__)) - key_path = os.path.join(current_dir, "foo_rsa") + key_path = os.path.join(current_dir, "bootstrap/foo_rsa") # Set up the SFTP filesystem connection using the relative SSH key path fs = fsspec.filesystem( @@ -31,6 +31,7 @@ def test_sftp_server(sftp_filesystem): {"name": "Mexico", "code": "MX"}, ] } + try: with fs.open(test_file, "w") as f: json.dump(json_data, f) @@ -50,11 +51,11 @@ def test_sftp_server(sftp_filesystem): fs.rm(test_file) -def test_pipeline_filesystem_sftp_destination(sftp_filesystem, environment): - environment["DESTINATION__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data" - environment["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" - environment["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" - environment["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "pass" +def test_pipeline_filesystem_sftp_destination(sftp_filesystem): + os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data" + os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" + os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "pass" @dlt.resource() def states(): diff --git a/tests/utils.py b/tests/utils.py index 1b81881470..6ba5771d3d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -40,6 +40,7 @@ "postgres", "snowflake", "filesystem", + "filesystem-sftp", "weaviate", "dummy", "motherduck", @@ -54,6 +55,7 @@ } NON_SQL_DESTINATIONS = { "filesystem", + "filesystem-sftp", "weaviate", "dummy", "qdrant", From a1e5d66f9e32adbe10f24eaa3604c6fd13a9d7ba Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Mon, 2 Sep 2024 20:34:03 +0200 Subject: [PATCH 11/41] fix: json import --- tests/load/filesystem_sftp/test_filesystem_sftp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index 4ea5a516cf..8e111e01bb 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -1,7 +1,7 @@ import pytest import os import fsspec -import json +from dlt.common.json import json import os import dlt from dlt.destinations.impl.filesystem.filesystem import FilesystemClient @@ -34,7 +34,7 @@ def test_sftp_server(sftp_filesystem): try: with fs.open(test_file, "w") as f: - json.dump(json_data, f) + f.write(json.dumps(json_data)) files = fs.ls("/data") assert test_file in files From c7b5072bb3053fda8307840b0041206cbdbc5197 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Tue, 3 Sep 2024 09:41:27 +0200 Subject: [PATCH 12/41] chore: clean tests and dockerfile --- .../load/filesystem_sftp/bootstrap/Dockerfile | 22 +++++------- .../filesystem_sftp/test_filesystem_sftp.py | 34 +++++++------------ 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/tests/load/filesystem_sftp/bootstrap/Dockerfile b/tests/load/filesystem_sftp/bootstrap/Dockerfile index 5d0ec77ec2..a04027cbac 100644 --- a/tests/load/filesystem_sftp/bootstrap/Dockerfile +++ b/tests/load/filesystem_sftp/bootstrap/Dockerfile @@ -1,39 +1,34 @@ -# Use Ubuntu latest as the base image FROM ubuntu:latest -# Avoid prompts from apt ENV DEBIAN_FRONTEND=noninteractive -# Update packages and install OpenSSH Server and vim +# update packages and install openssh server RUN apt-get update && \ - apt-get install -y openssh-server vim && \ + apt-get install -y openssh-server && \ rm -rf /var/lib/apt/lists/* -# Set up user for SFTP with no shell login +# set up user for SFTP with no shell login RUN useradd -m -d /home/foo -s /usr/sbin/nologin foo && \ mkdir -p /home/foo/.ssh && \ chown foo:foo /home/foo/.ssh && \ chmod 700 /home/foo/.ssh -# Set password for the user foo +# set password for the user foo RUN echo 'foo:pass' | chpasswd -# Copy the public key -# Ensure you replace 'foo_rsa.pub' with your actual public key file name +# copy the public key and set permissions COPY foo_rsa.pub /home/foo/.ssh/authorized_keys - -# Set permissions for the public key RUN chmod 600 /home/foo/.ssh/authorized_keys && \ chown foo:foo /home/foo/.ssh/authorized_keys -# Create a directory for SFTP that the user will have access to +# create a directory for SFTP that the user will have access to RUN mkdir -p /home/foo/sftp/data && \ chown root:root /home/foo /home/foo/sftp && \ chmod 755 /home/foo /home/foo/sftp && \ chown foo:foo /home/foo/sftp/data && \ chmod 755 /home/foo/sftp/data -# Configure SSH for SFTP +# configure SSH for SFTP: allow password and pubkey authentication RUN mkdir -p /run/sshd && \ echo "Match User foo" >> /etc/ssh/sshd_config && \ echo " ChrootDirectory /home/foo/sftp" >> /etc/ssh/sshd_config && \ @@ -45,8 +40,7 @@ RUN mkdir -p /run/sshd && \ echo " AllowTcpForwarding no" >> /etc/ssh/sshd_config && \ echo " X11Forwarding no" >> /etc/ssh/sshd_config -# Expose the SSH port EXPOSE 22 -# Run SSHD on container start +# run sshd on container start CMD ["/usr/sbin/sshd", "-D", "-e"] diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index 8e111e01bb..9d9c8e1c0a 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -1,30 +1,27 @@ -import pytest import os +import pytest import fsspec -from dlt.common.json import json -import os import dlt + +from dlt.common.json import json from dlt.destinations.impl.filesystem.filesystem import FilesystemClient -import logging @pytest.fixture(scope="module") def sftp_filesystem(): - # Determine the path to the SSH key relative to this Python file + # path to the private key current_dir = os.path.dirname(os.path.abspath(__file__)) key_path = os.path.join(current_dir, "bootstrap/foo_rsa") - # Set up the SFTP filesystem connection using the relative SSH key path fs = fsspec.filesystem( "sftp", host="localhost", port=2222, username="foo", key_filename=key_path ) yield fs -def test_sftp_server(sftp_filesystem): - fs = sftp_filesystem +def test_filesystem_sftp_server(sftp_filesystem): test_file = "/data/countries.json" - json_data = { + input_data = { "countries": [ {"name": "United States", "code": "US"}, {"name": "Canada", "code": "CA"}, @@ -32,26 +29,29 @@ def test_sftp_server(sftp_filesystem): ] } + fs = sftp_filesystem try: with fs.open(test_file, "w") as f: - f.write(json.dumps(json_data)) + f.write(json.dumps(input_data)) files = fs.ls("/data") assert test_file in files with fs.open(test_file, "r") as f: data = json.load(f) - assert data == json_data + assert data == input_data info = fs.info(test_file) assert "mtime" in info - assert info["type"] == "file" finally: fs.rm(test_file) -def test_pipeline_filesystem_sftp_destination(sftp_filesystem): +def test_filesystem_sftp_pipeline(sftp_filesystem): + import posixpath + import pyarrow.parquet as pq + os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data" os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" @@ -61,25 +61,17 @@ def test_pipeline_filesystem_sftp_destination(sftp_filesystem): def states(): yield [{"id": 1, "name": "DE"}, {"id": 2, "name": "AK"}, {"id": 3, "name": "CA"}] - # f = filesystem(bucket_url="sftp://localhost/data") pipeline = dlt.pipeline(destination="filesystem", dataset_name="test") - pipeline.run([states], loader_file_format="parquet") client: FilesystemClient = pipeline.destination_client() # type: ignore[assignment] - - import posixpath - data_glob = posixpath.join(client.dataset_path, "states/*") data_files = client.fs_client.glob(data_glob) assert len(data_files) > 0 fs = sftp_filesystem with fs.open(data_files[0], "rb") as f: - import pyarrow.parquet as pq - rows = pq.read_table(f).to_pylist() - result_states = [r["name"] for r in rows] expected_states = ["DE", "AK", "CA"] From 2b51113120d76dd649777191d7cc60669883c613 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Tue, 3 Sep 2024 09:51:42 +0200 Subject: [PATCH 13/41] refactor: ci test exec for sftp server --- .../test_destination_filesystem_sftp.yml | 83 ------------------- .github/workflows/test_local_destinations.yml | 7 ++ tests/load/filesystem_sftp/__init__.py | 2 +- tests/utils.py | 2 - 4 files changed, 8 insertions(+), 86 deletions(-) delete mode 100644 .github/workflows/test_destination_filesystem_sftp.yml diff --git a/.github/workflows/test_destination_filesystem_sftp.yml b/.github/workflows/test_destination_filesystem_sftp.yml deleted file mode 100644 index 74a2fdf626..0000000000 --- a/.github/workflows/test_destination_filesystem_sftp.yml +++ /dev/null @@ -1,83 +0,0 @@ - -name: test | filesystem sftp - -on: - pull_request: - branches: - - master - - devel - workflow_dispatch: - schedule: - - cron: '0 2 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -env: - RUNTIME__SENTRY_DSN: https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752 - RUNTIME__LOG_LEVEL: ERROR - RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} - - ACTIVE_DESTINATIONS: "[\"filesystem-sftp\"]" - ALL_FILESYSTEM_DRIVERS: "[\"memory\"]" - -jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml - if: ${{ !github.event.pull_request.head.repo.fork || contains(github.event.pull_request.labels.*.name, 'ci from fork')}} - - run_loader: - name: test | filesystem sftp tests - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' - defaults: - run: - shell: bash - runs-on: "ubuntu-latest" - - steps: - - - name: Check out - uses: actions/checkout@master - - - name: Start SFTP server - run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: "3.10.x" - - - name: Install Poetry - uses: snok/install-poetry@v1.3.2 - with: - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v3 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp - - - name: Install dependencies - run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline - - - run: | - poetry run pytest tests/load/filesystem_sftp - if: runner.os != 'Windows' - name: Run tests Linux/MAC - - - run: | - poetry run pytest tests/load/filesystem_sftp - if: runner.os == 'Windows' - name: Run tests Windows - shell: cmd - - - name: Stop SFTP server - if: always() - run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml index 78ea23ec1c..bf159afeb0 100644 --- a/.github/workflows/test_local_destinations.yml +++ b/.github/workflows/test_local_destinations.yml @@ -74,6 +74,9 @@ jobs: - name: Start weaviate run: docker compose -f ".github/weaviate-compose.yml" up -d + + - name: Start SFTP server + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d - name: Setup Python uses: actions/setup-python@v4 @@ -110,3 +113,7 @@ jobs: - name: Stop weaviate if: always() run: docker compose -f ".github/weaviate-compose.yml" down -v + + - name: Stop SFTP server + if: always() + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v diff --git a/tests/load/filesystem_sftp/__init__.py b/tests/load/filesystem_sftp/__init__.py index c5e215e0c5..0d23f8002b 100644 --- a/tests/load/filesystem_sftp/__init__.py +++ b/tests/load/filesystem_sftp/__init__.py @@ -1,3 +1,3 @@ from tests.utils import skip_if_not_active -skip_if_not_active("filesystem-sftp") +skip_if_not_active("filesystem") diff --git a/tests/utils.py b/tests/utils.py index 6ba5771d3d..1b81881470 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -40,7 +40,6 @@ "postgres", "snowflake", "filesystem", - "filesystem-sftp", "weaviate", "dummy", "motherduck", @@ -55,7 +54,6 @@ } NON_SQL_DESTINATIONS = { "filesystem", - "filesystem-sftp", "weaviate", "dummy", "qdrant", From 3ff56abed069d25d3b587cc47c77ec1c43a752f2 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Tue, 3 Sep 2024 19:48:07 +0200 Subject: [PATCH 14/41] feat: sftp file url parser --- dlt/common/storages/configuration.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dlt/common/storages/configuration.py b/dlt/common/storages/configuration.py index 1a0c0f5c4f..149e9729c6 100644 --- a/dlt/common/storages/configuration.py +++ b/dlt/common/storages/configuration.py @@ -57,6 +57,11 @@ class LoadStorageConfiguration(BaseConfiguration): ] +def _make_sftp_url(scheme: str, fs_path: str, bucket_url: str) -> str: + parsed_bucket_url = urlparse(bucket_url) + return f"{scheme}://{parsed_bucket_url.hostname}/{fs_path}" + + def _make_az_url(scheme: str, fs_path: str, bucket_url: str) -> str: parsed_bucket_url = urlparse(bucket_url) if parsed_bucket_url.username: @@ -81,7 +86,7 @@ def _make_file_url(scheme: str, fs_path: str, bucket_url: str) -> str: return p_.as_uri() -MAKE_URI_DISPATCH = {"az": _make_az_url, "file": _make_file_url} +MAKE_URI_DISPATCH = {"az": _make_az_url, "file": _make_file_url, "sftp": _make_sftp_url} MAKE_URI_DISPATCH["adl"] = MAKE_URI_DISPATCH["az"] MAKE_URI_DISPATCH["abfs"] = MAKE_URI_DISPATCH["az"] From 531f282e9497272fc890af50866840eafeca7c85 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Tue, 3 Sep 2024 19:48:50 +0200 Subject: [PATCH 15/41] test: sftp reading using file samples --- tests/load/filesystem_sftp/docker-compose.yml | 2 + .../filesystem_sftp/test_filesystem_sftp.py | 45 ++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/tests/load/filesystem_sftp/docker-compose.yml b/tests/load/filesystem_sftp/docker-compose.yml index fc692ff616..078f22f432 100644 --- a/tests/load/filesystem_sftp/docker-compose.yml +++ b/tests/load/filesystem_sftp/docker-compose.yml @@ -11,6 +11,8 @@ services: - sftpserver ports: - "2222:22" + volumes: + - ../../common/storages/samples:/home/foo/sftp/data/samples networks: sftpserver: diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index 9d9c8e1c0a..3506871145 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -4,17 +4,28 @@ import dlt from dlt.common.json import json +from dlt.common.configuration.inject import with_config +from dlt.common.storages import FilesystemConfiguration, fsspec_from_config +from dlt.common.storages.fsspec_filesystem import glob_files from dlt.destinations.impl.filesystem.filesystem import FilesystemClient +from tests.common.storages.utils import assert_sample_files -@pytest.fixture(scope="module") -def sftp_filesystem(): - # path to the private key + +@with_config(spec=FilesystemConfiguration, sections=("sources", "filesystem")) +def get_config(config: FilesystemConfiguration = None) -> FilesystemConfiguration: + return config + + +def get_key_path() -> str: current_dir = os.path.dirname(os.path.abspath(__file__)) - key_path = os.path.join(current_dir, "bootstrap/foo_rsa") + return os.path.join(current_dir, "bootstrap/foo_rsa") + +@pytest.fixture(scope="module") +def sftp_filesystem(): fs = fsspec.filesystem( - "sftp", host="localhost", port=2222, username="foo", key_filename=key_path + "sftp", host="localhost", port=2222, username="foo", key_filename=get_key_path() ) yield fs @@ -48,7 +59,7 @@ def test_filesystem_sftp_server(sftp_filesystem): fs.rm(test_file) -def test_filesystem_sftp_pipeline(sftp_filesystem): +def test_filesystem_sftp_write(sftp_filesystem): import posixpath import pyarrow.parquet as pq @@ -76,3 +87,25 @@ def states(): expected_states = ["DE", "AK", "CA"] assert sorted(result_states) == sorted(expected_states) + + +@pytest.mark.parametrize("load_content", (True, False)) +@pytest.mark.parametrize("glob_filter", ("**", "**/*.csv", "*.txt", "met_csv/A803/*.csv")) +def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None: + # docker volume mount on: /home/foo/sftp/data/samples but /data/samples is the path in the SFTP server + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path() + + config = get_config() + fs, _ = fsspec_from_config(config) + + files = fs.ls("/data/samples") + + assert len(files) > 0 + # use glob to get data + all_file_items = list(glob_files(fs, config.bucket_url, file_glob=glob_filter)) + + print(all_file_items) + assert_sample_files(all_file_items, fs, config, load_content, glob_filter) From 99157c9ace1894573f9c06a1602731f9bad3c1fb Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 12:48:48 +0200 Subject: [PATCH 16/41] chore: extended SFTP credentials class --- .../configuration/specs/sftp_crendentials.py | 75 ++++++++++++++----- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/dlt/common/configuration/specs/sftp_crendentials.py b/dlt/common/configuration/specs/sftp_crendentials.py index 4ff36efaa3..92f83cc438 100644 --- a/dlt/common/configuration/specs/sftp_crendentials.py +++ b/dlt/common/configuration/specs/sftp_crendentials.py @@ -1,32 +1,69 @@ from typing import Any, Dict, Optional -from dlt.common.typing import TSecretValue, TSecretStrValue, DictStrAny +from dlt.common.typing import TSecretStrValue, DictStrAny from dlt.common.configuration.specs.base_configuration import CredentialsConfiguration, configspec @configspec class SFTPCredentials(CredentialsConfiguration): - """Credentials for SFTP filesystem, compatible with fsspec/sftp""" + """Credentials for SFTP filesystem, compatible with fsspec SFTP protocol. - # TODO: separate config and secrets - sftp_port: str = None - sftp_username: str = None + Authentication is attempted in the following order of priority: + + - `key_filename` may contain OpenSSH public certificate paths + as well as regular private-key paths; when files ending in `-cert.pub` are found, they are assumed to match + a private key, and both components will be loaded. + + - Any key found through an SSH agent: any “id_rsa”, “id_dsa”, or “id_ecdsa” key discoverable in ~/.ssh/. + + - Plain username/password authentication, if a password was provided. + + - If a private key requires a password to unlock it, and a password is provided, that password will be used to + attempt to unlock the key. + + For more information about parameters: + https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect + """ + + sftp_port: Optional[int] = 22 + sftp_username: Optional[str] = None sftp_password: Optional[TSecretStrValue] = None - sftp_key_filename: Optional[TSecretStrValue] = None # path to the private key file - sftp_key_passphrase: Optional[TSecretStrValue] = None # passphrase for the private key + sftp_key_filename: Optional[str] = None + sftp_key_passphrase: Optional[TSecretStrValue] = None + sftp_timeout: Optional[float] = None + sftp_banner_timeout: Optional[float] = None + sftp_auth_timeout: Optional[float] = None + sftp_channel_timeout: Optional[float] = None + sftp_allow_agent: Optional[bool] = True + sftp_look_for_keys: Optional[bool] = True + sftp_compress: Optional[bool] = False + sftp_gss_auth: Optional[bool] = False + sftp_gss_kex: Optional[bool] = False + sftp_gss_deleg_creds: Optional[bool] = True + sftp_gss_host: Optional[str] = None + sftp_gss_trust_dns: Optional[bool] = True def to_fsspec_credentials(self) -> Dict[str, Any]: - """Return a dict that can be passed to fsspec/sftp""" - - # fsspec/sftp (ssh_args) args: - # - hostname, port=22, username=None, password=None, pkey=None, key_filename=None, timeout=None, allow_agent=True, look_for_keys=True, compress=False, sock=None, gss_auth=False, gss_kex=False, gss_deleg_creds=True, gss_host=None, banner_timeout=None, auth_timeout=None, channel_timeout=None, gss_trust_dns=True, passphrase=None, disabled_algorithms=None, transport_factory=None, auth_strategy=None - # link: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect - credentials: DictStrAny = dict( - port=self.sftp_port, - username=self.sftp_username, - password=self.sftp_password, - key_filename=self.sftp_key_filename, - passphrase=self.sftp_key_passphrase, - ) + """Return a dict that can be passed to fsspec SFTP/SSHClient.connect method.""" + + credentials: Dict[str, Any] = { + "port": self.sftp_port, + "username": self.sftp_username, + "password": self.sftp_password, + "key_filename": self.sftp_key_filename, + "passphrase": self.sftp_key_passphrase, + "timeout": self.sftp_timeout, + "banner_timeout": self.sftp_banner_timeout, + "auth_timeout": self.sftp_auth_timeout, + "channel_timeout": self.sftp_channel_timeout, + "allow_agent": self.sftp_allow_agent, + "look_for_keys": self.sftp_look_for_keys, + "compress": self.sftp_compress, + "gss_auth": self.sftp_gss_auth, + "gss_kex": self.sftp_gss_kex, + "gss_deleg_creds": self.sftp_gss_deleg_creds, + "gss_host": self.sftp_gss_host, + "gss_trust_dns": self.sftp_gss_trust_dns, + } return credentials From 733bee2cf4dc213221910697269292ba46f6c717 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:03:22 +0200 Subject: [PATCH 17/41] docs: filesystem SFTP credentials and authentication --- .../dlt-ecosystem/destinations/filesystem.md | 90 ++++++++++++++++++- 1 file changed, 87 insertions(+), 3 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index 03f6a86337..40aba4517b 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -260,13 +260,97 @@ bucket_url='\\?\UNC\localhost\c$\a\b\c' ::: ### SFTP +Run `pip install "dlt[paramiko]` which will install the `paramiko` package alongside `dlt`, enabling secure SFTP transfers. -TODO: add explanation and examples +Configure your SFTP credentials by editing the `.dlt/secrets.toml` file. By default, the file contains placeholders for AWS credentials. You should replace these with your SFTP credentials. -```sh -pip install paramiko +Below are the possible fields for SFTP credentials configuration: + +```text +sftp_port # The port for SFTP, defaults to 22 (standard for SSH/SFTP) +sftp_username # Your SFTP username, defaults to None +sftp_password # Your SFTP password (if using password-based auth), defaults to None +sftp_key_filename # Path to your private key file for key-based authentication, defaults to None +sftp_key_passphrase # Passphrase for your private key (if applicable), defaults to None +sftp_timeout # Timeout for establishing a connection, defaults to None +sftp_banner_timeout # Timeout for receiving the banner during authentication, defaults to None +sftp_auth_timeout # Authentication timeout, defaults to None +sftp_channel_timeout # Channel timeout for SFTP operations, defaults to None +sftp_allow_agent # Use SSH agent for key management (if available), defaults to True +sftp_look_for_keys # Search for SSH keys in the default SSH directory (~/.ssh/), defaults to True +sftp_compress # Enable compression (can improve performance over slow networks), defaults to False +sftp_gss_auth # Use GSS-API for authentication, defaults to False +sftp_gss_kex # Use GSS-API for key exchange, defaults to False +sftp_gss_deleg_creds # Delegate credentials with GSS-API, defaults to True +sftp_gss_host # Host for GSS-API, defaults to None +sftp_gss_trust_dns # Trust DNS for GSS-API, defaults to True +``` +> For more information about credentials parameters: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect + +### Authentication Methods + +SFTP authentication is attempted in the following order of priority: + +1. **Key-based authentication**: If you provide a `key_filename` containing the path to a private key or a corresponding OpenSSH public certificate (e.g., `id_rsa` and `id_rsa-cert.pub`), these will be used for authentication. If the private key requires a passphrase, you can specify it via `sftp_key_passphrase`. If your private key requires a passphrase to unlock, and you’ve provided one, it will be used to attempt to unlock the key. + +2. **SSH Agent-based authentication**: If `allow_agent=True` (default), Paramiko will look for any SSH keys stored in your local SSH agent (such as `id_rsa`, `id_dsa`, or `id_ecdsa` keys stored in `~/.ssh/`). + +3. **Username/Password authentication**: If a password is provided (`sftp_password`), plain username/password authentication will be attempted. + +4. **GSS-API authentication**: If GSS-API (Kerberos) is enabled (sftp_gss_auth=True), authentication will use the Kerberos protocol. GSS-API may also be used for key exchange (sftp_gss_kex=True) and credential delegation (sftp_gss_deleg_creds=True). This method is useful in environments where Kerberos is set up, often in enterprise networks. + + +#### 1. **Key-based Authentication** + +If you use an SSH key instead of a password, you can specify the path to your private key in the configuration. + +```toml +[destination.filesystem] +bucket_url = "sftp://[hostname]/[path]" +file_glob = "*" + +[destination.filesystem.credentials] +sftp_username = "foo" +sftp_key_filename = "/path/to/id_rsa" # Replace with the path to your private key file +sftp_key_passphrase = "your_passphrase" # Optional: passphrase for your private key +``` + +#### 2. **SSH Agent-based Authentication** + +If you have an SSH agent running with loaded keys, you can allow Paramiko to use these keys automatically. You can omit the password and key fields if you're relying on the SSH agent. + +```toml +[destination.filesystem] +bucket_url = "sftp://[hostname]/[path]" +file_glob = "*" + +[destination.filesystem.credentials] +sftp_username = "foo" +sftp_allow_agent = true # Enable SSH agent usage +sftp_look_for_keys = true # Allow searching for keys in ~/.ssh/ +``` + +#### 3. **Username/Password Authentication** + +This is the simplest form of authentication, where you supply a username and password directly. + +```toml +[destination.filesystem] +bucket_url = "sftp://[hostname]/[path]" # The hostname of your SFTP server and the remote path +file_glob = "*" # Pattern to match the files you want to upload/download + +[destination.filesystem.credentials] +sftp_username = "foo" # Replace "foo" with your SFTP username +sftp_password = "pass" # Replace "pass" with your SFTP password ``` + +### Notes: +- **Key-based Authentication**: Make sure your private key has the correct permissions (`chmod 600`), or SSH will refuse to use it. +- **Timeouts**: It's important to adjust timeout values based on your network conditions to avoid connection issues. + +This configuration allows flexible SFTP authentication, whether you're using passwords, keys, or agents, and ensures secure communication between your local environment and the SFTP server. + ## Write disposition The filesystem destination handles the write dispositions as follows: - `append` - files belonging to such tables are added to the dataset folder From 8f538329e1c1867f377c30d4d7bf37422b202c42 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 16:56:51 +0200 Subject: [PATCH 18/41] chore: add bobby password protected key-based authentication --- .../load/filesystem_sftp/bootstrap/Dockerfile | 41 ++++++++++++--- .../load/filesystem_sftp/bootstrap/bobby_rsa | 50 +++++++++++++++++++ .../filesystem_sftp/bootstrap/bobby_rsa.pub | 1 + 3 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 tests/load/filesystem_sftp/bootstrap/bobby_rsa create mode 100644 tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub diff --git a/tests/load/filesystem_sftp/bootstrap/Dockerfile b/tests/load/filesystem_sftp/bootstrap/Dockerfile index a04027cbac..b4966bfa46 100644 --- a/tests/load/filesystem_sftp/bootstrap/Dockerfile +++ b/tests/load/filesystem_sftp/bootstrap/Dockerfile @@ -2,33 +2,32 @@ FROM ubuntu:latest ENV DEBIAN_FRONTEND=noninteractive -# update packages and install openssh server RUN apt-get update && \ apt-get install -y openssh-server && \ rm -rf /var/lib/apt/lists/* -# set up user for SFTP with no shell login +# USER foo: set up user for SFTP with no shell login RUN useradd -m -d /home/foo -s /usr/sbin/nologin foo && \ mkdir -p /home/foo/.ssh && \ chown foo:foo /home/foo/.ssh && \ chmod 700 /home/foo/.ssh -# set password for the user foo +# USER foo: set password for the user foo RUN echo 'foo:pass' | chpasswd -# copy the public key and set permissions +# USER foo: copy the public key and set permissions COPY foo_rsa.pub /home/foo/.ssh/authorized_keys RUN chmod 600 /home/foo/.ssh/authorized_keys && \ chown foo:foo /home/foo/.ssh/authorized_keys -# create a directory for SFTP that the user will have access to +# USER foo: create a directory for SFTP that the user will have access to RUN mkdir -p /home/foo/sftp/data && \ chown root:root /home/foo /home/foo/sftp && \ chmod 755 /home/foo /home/foo/sftp && \ chown foo:foo /home/foo/sftp/data && \ chmod 755 /home/foo/sftp/data -# configure SSH for SFTP: allow password and pubkey authentication +# USER foo: configure SSH for SFTP: allow password and pubkey authentication RUN mkdir -p /run/sshd && \ echo "Match User foo" >> /etc/ssh/sshd_config && \ echo " ChrootDirectory /home/foo/sftp" >> /etc/ssh/sshd_config && \ @@ -40,6 +39,36 @@ RUN mkdir -p /run/sshd && \ echo " AllowTcpForwarding no" >> /etc/ssh/sshd_config && \ echo " X11Forwarding no" >> /etc/ssh/sshd_config +# USER bobby: set up user for SFTP with no shell login +RUN useradd -m -d /home/bobby -s /usr/sbin/nologin bobby && \ + mkdir -p /home/bobby/.ssh && \ + chown bobby:bobby /home/bobby/.ssh && \ + chmod 700 /home/bobby/.ssh + +# USER bobby: copy the public key and set permissions +COPY bobby_rsa.pub /home/bobby/.ssh/authorized_keys +RUN chmod 600 /home/bobby/.ssh/authorized_keys && \ + chown bobby:bobby /home/bobby/.ssh/authorized_keys + +# USER bobby: create a directory for SFTP that the user will have access to +RUN mkdir -p /home/bobby/sftp/data && \ + chown root:root /home/bobby /home/bobby/sftp && \ + chmod 755 /home/bobby /home/bobby/sftp && \ + chown bobby:bobby /home/bobby/sftp/data && \ + chmod 755 /home/bobby/sftp/data + +# USER bobby: configure SSH for SFTP: allow password and pubkey authentication +RUN mkdir -p /run/sshd && \ + echo "Match User bobby" >> /etc/ssh/sshd_config && \ + echo " ChrootDirectory /home/bobby/sftp" >> /etc/ssh/sshd_config && \ + echo " ForceCommand internal-sftp" >> /etc/ssh/sshd_config && \ + echo " PasswordAuthentication no" >> /etc/ssh/sshd_config && \ + echo " PubkeyAuthentication yes" >> /etc/ssh/sshd_config && \ + echo " PermitTunnel no" >> /etc/ssh/sshd_config && \ + echo " AllowAgentForwarding no" >> /etc/ssh/sshd_config && \ + echo " AllowTcpForwarding no" >> /etc/ssh/sshd_config && \ + echo " X11Forwarding no" >> /etc/ssh/sshd_config + EXPOSE 22 # run sshd on container start diff --git a/tests/load/filesystem_sftp/bootstrap/bobby_rsa b/tests/load/filesystem_sftp/bootstrap/bobby_rsa new file mode 100644 index 0000000000..1ee49f5776 --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/bobby_rsa @@ -0,0 +1,50 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAACmFlczI1Ni1jdHIAAAAGYmNyeXB0AAAAGAAAABB98Gk5Ak +IB7cgFibVwRnadAAAAGAAAAAEAAAIXAAAAB3NzaC1yc2EAAAADAQABAAACAQD2sf6SyXup +5tWG/cqSTtORJJGM1teEnDpyupHbUP1LHXvq5nHJ0g82YX9v3wUJ3Nkd6ZMbh+bs37BCaQ +zR2oDWs3kIhsx/xesg03N6tdVMpXFw3tgSceSsXe31YS5rUVNGYIUVtcP3xFRQvLG41X0/ +5GEGS+yiicq+LfqyMnE6Np+/FGaCFzuW1nE8hrR4B/YoLKJs8e4kKOs8Jsj8mCCAXvAk+S +NDZLbVhx5pQcybUSNYqC4NExn5EEj4xK71vHa4oA3gtgeubKXVr04fk5C8maku/tr1P63M +UKtuvTtZBmNH+RVs9i9IwsF8VSMBSMyTMuOaa1Tsa3FIwnGZpZ1PcfTBG7CJPMu4drMBSw +5y0DiAY8mdGvdklz1esG0DldAZx5Blfvw6XU16588e3zQKNiApNwQHlvJJEb0Gb7rFw/Tt +HhCT8MtpBqIf+gRMaFhajbgkQI6CDABp6RgmpqcI6xDFwvkQ+JFveFNFy6zXVrd+oYgz16 +owDpEzfxTYp0mrhivpa0dkoKqgjIU8VMuQfegoxQiJUi+mW9IqrmzcGwAlRTzBcT+IJ9Jz +mfZxuZQshWIQe05nPMqCJqpSukg6tN+Fl0kVF8swz6pNjEIcAryIuHlqOxSyQz8pqxUPBS +PqAEcY0i+0VeMfweQ2TNHdtT3IYzOZrT79r/Ba6Jhy6wAAB1DPvTW0+gfDxLsGAokxLfHY +YtTqqu6siXxI1YPVS57m3HV3iL4PZ3yCcofMFjHT3R+QfSlJxmjg4cHLKCe2P4h10fMxSa +1T+H/NHJvh4ZXt2PfCGXKcXl65t/d5KnM1LSCWHxAXH+u8gOL8giRXljzzjjEl2xFchiYn +zf+pGaCBgfbd1232lePkeKHQSyR6gSOe5t15BwUVRoOKXIrFgpyf+a5i4sbOqBr464wcFd +cLrFcXMmtArNzWLrtBfyFgtyv2KqcdQ7PhUh4JZoNXKSI+28P8sj8xoe5PCCUnk9JlHsEe +j09mzPSqrfHm6JcEuuR/685hhTvlB3Wo0q0dc0AHIDNHQjL0WmKyDWGc321A+QquJdhkYb +v/BuDzrSuSA4tTrMfS84LNFvDtB0NtxfCz/doqiqTaMHre1DrA348Cvzrs53SQjiiT8PUC +SI5mO6u+XY5wyi80knGhJdOYZSwB/m8BuGZJlzRrR4hkbCBtrnwVd/R7jlIdobqJm5Y4+i +iocCGCAvEsKUz5RYZdJiCC85PhsUOtIdFeaCltrTNgmpAKO0GQdI4mYRUOrcXoaM6Y/eMi +zkbcDeitvGcT2YYkflmwxg/7G46sRbICgf8s+lcRix0P6grSvAINGdNBE3yz9YfeNKJh43 +BSQQxulk7ZM0juNqbMl/gGBikncsnx/aVusBzqo3jxZ0g2X2nn1rS0So13Fc54I1957YME +L0u3xY9nttkCY+TC5q+DuwjRZnu9uUe7qPSNpcI4L7SnU6YpW8qdkYf00Lp/CVAks8YjVx +1b0+FiM9CIYAQwQEcEGCHhi24IYwVLvvL+sXZZYRubgCOy74bxJKDZ173SJySvXdjz2d89 +gasbagNdvLP50WPm+b3KaCyUwhyHez0r23LnCAHfHi3MRO3FJPQr9/F5XsHDdpChWKngdq +juKQNtTuqwXlaSr+M8Sr7IMnnpzBeqsfldT223+kwmeWdmvXeR/Xcfw+8pexn2u8GL2jW+ +T5P7zaosKXHQwVUxOOvwQkzo7ldE3Dwj+SC//OfZtMWampuX2xoYbqxQEkAUGO+8g/FcA0 +E3TU4LlhbUbLZj2EA2nZSb78P9N9Uf3OzP9Cjl/g8B6jpDv/jzwXugONMZvANle8mwq4PV +2hRsdo/Y3IQ5eBO2z1sONbN0oluvdfwNVyS6kWIDT/EVEseeNUTKr0EsgoKNswxw/ZeTdT +fTqV+JSmiJSyy3pBaBCDs6w6Xo2wG1AZI7ggtb5zf/QDQqCgL1cvfneQx6de9aFmozt+Bu +DlmBchWcuSEZMrWaYsSuPNy2DgSMZE1UyTbnHwl8fC9N0etwvTD+C2L19BIUt+AsIUZ08R +WmBEYY5aeAlmZnNSVgakHQpyD2EY/jB89l8n2yK74Ortc7e1qbJtEn5KOd5LywjI1N+JLT +M3yUvz6+TFvhHi/mmgb05tcz60QR9CVl61zliVlV+TdP3smmypBjLhxa7a/FmRLGuSU0V6 +mOvpmALMrfRQRR3V5Mkq4kJsR6khBXwljhwtXveT67o/A9HygmAfG7+nAgPFHlCkHzPB28 +HAXwguiSrfUpyIzCTMvVE1mpt9dLN9RYtD1rcVfGBwUmZmSz1jP36pJZi1/84/8TeL3wjK +iJkJZ+UFhMkz+E2m25e/eHmeOmfxHCvPKqQAIzQLC/yR1wkp3hZIhk/6/ImJmGtjoS7/O3 +I3c998HSjQgP2OE6spr7J2YuLfUuz4QOFTs757pzgnfQNIrxioqmLb8dOGCfYhDeW5cEz8 +CE9piaoXXSFpQCsBwer+KFiyOdvNIeNDQBoruElS4KTO/qpYmDkOqzCywpUQCvmx6umqFd +5XaibupiP87chS68oyR3Fs/Ga+qsZ94CFi+6WYTT4GRug5flGtfZP5BeRN4O5AtICr/1pR +7tbw7lgwI1PN1G6jxMSrmstIjOA2vn9KxeRxpPLw+I5SjBZsyfjzjoCsZqTy9gvpnxf5Cu +Il/vgIZXNh3WC8Ypl7zvXsesiISd/7EiHPtncDy9dPk0XMVMfMUxTzWY7tle/xma8atRv7 +vf8w/XfP192OmpYf19+YwNjjUOksuT3WC50NbYAtYasIqJQtxC+2XgaCbf2bheZ01HBEO5 +qOqB+6o2KPz2cxjZIxRsyeX75VMETjPCQvYv9DIMRN4UP14VJEbLbVFDB9HOYpCirJrUC8 +NjUDdlTouyzx2F5Bw4mDnBOKRYuDtIwh7FtkDDCP7Fu9bIzeK+8Tpjn/WkIv600rKmrnZ9 ++a2Fy2LKIKJjyx+oISFiE52kE+rUVW/KKajHdxVZVbdOoHXzst5ZINFJ6uVaPDcyErsIoq +bbsRY8B3WwaU8Qa4AKUiqfTahZ22ducEyiDYs1apLZuE1HWKORXgWPMtdLPAE7+E/zo5ss +d7g+ddymwCBEV6q7p6Nw2f7HlTO++cvkB0EQDmrpWEcoRkSELTBQ6/SND90CX4yo2Rh6qq +Qi1l9MV2/ft4KKZsTqSlVV+po= +-----END OPENSSH PRIVATE KEY----- diff --git a/tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub b/tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub new file mode 100644 index 0000000000..1f9ef6f504 --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQD2sf6SyXup5tWG/cqSTtORJJGM1teEnDpyupHbUP1LHXvq5nHJ0g82YX9v3wUJ3Nkd6ZMbh+bs37BCaQzR2oDWs3kIhsx/xesg03N6tdVMpXFw3tgSceSsXe31YS5rUVNGYIUVtcP3xFRQvLG41X0/5GEGS+yiicq+LfqyMnE6Np+/FGaCFzuW1nE8hrR4B/YoLKJs8e4kKOs8Jsj8mCCAXvAk+SNDZLbVhx5pQcybUSNYqC4NExn5EEj4xK71vHa4oA3gtgeubKXVr04fk5C8maku/tr1P63MUKtuvTtZBmNH+RVs9i9IwsF8VSMBSMyTMuOaa1Tsa3FIwnGZpZ1PcfTBG7CJPMu4drMBSw5y0DiAY8mdGvdklz1esG0DldAZx5Blfvw6XU16588e3zQKNiApNwQHlvJJEb0Gb7rFw/TtHhCT8MtpBqIf+gRMaFhajbgkQI6CDABp6RgmpqcI6xDFwvkQ+JFveFNFy6zXVrd+oYgz16owDpEzfxTYp0mrhivpa0dkoKqgjIU8VMuQfegoxQiJUi+mW9IqrmzcGwAlRTzBcT+IJ9JzmfZxuZQshWIQe05nPMqCJqpSukg6tN+Fl0kVF8swz6pNjEIcAryIuHlqOxSyQz8pqxUPBSPqAEcY0i+0VeMfweQ2TNHdtT3IYzOZrT79r/Ba6Jhy6w== bobby@example.com From 706e5cd02cef74781a04241726dd401d754cd746 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 16:57:38 +0200 Subject: [PATCH 19/41] docs: sftp correction for ssh-agent --- docs/website/docs/dlt-ecosystem/destinations/filesystem.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index 40aba4517b..dbe09e45ba 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -326,9 +326,9 @@ file_glob = "*" [destination.filesystem.credentials] sftp_username = "foo" -sftp_allow_agent = true # Enable SSH agent usage -sftp_look_for_keys = true # Allow searching for keys in ~/.ssh/ +sftp_key_passphrase = "your_passphrase" # Optional: passphrase for your private key ``` +The loaded key must be one of the following types stored in ~/.ssh/: id_rsa, id_dsa, or id_ecdsa. #### 3. **Username/Password Authentication** From 5ba68203f53116b0e5b192e13dca06ba1dde6a2c Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 17:15:23 +0200 Subject: [PATCH 20/41] chore: add docker volume --- tests/load/filesystem_sftp/docker-compose.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/load/filesystem_sftp/docker-compose.yml b/tests/load/filesystem_sftp/docker-compose.yml index 078f22f432..d9e992a8cd 100644 --- a/tests/load/filesystem_sftp/docker-compose.yml +++ b/tests/load/filesystem_sftp/docker-compose.yml @@ -12,7 +12,8 @@ services: ports: - "2222:22" volumes: - - ../../common/storages/samples:/home/foo/sftp/data/samples + - ../../common/storages/samples:/home/foo/sftp/data/samples + - ../../common/storages/samples:/home/bobby/sftp/data/samples networks: sftpserver: From eefaafe7d1ac570a6251acee9c8fd331dc550ea1 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 17:16:10 +0200 Subject: [PATCH 21/41] chore: revert ci changes --- .github/workflows/test_local_destinations.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml index bf159afeb0..d09397a16e 100644 --- a/.github/workflows/test_local_destinations.yml +++ b/.github/workflows/test_local_destinations.yml @@ -74,9 +74,6 @@ jobs: - name: Start weaviate run: docker compose -f ".github/weaviate-compose.yml" up -d - - - name: Start SFTP server - run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d - name: Setup Python uses: actions/setup-python@v4 @@ -113,7 +110,4 @@ jobs: - name: Stop weaviate if: always() run: docker compose -f ".github/weaviate-compose.yml" down -v - - - name: Stop SFTP server - if: always() - run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v + \ No newline at end of file From a3833abdceb2c6c782679965ff6d47e3318e81de Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 17:19:04 +0200 Subject: [PATCH 22/41] test: refactor sftp with auth methods --- .github/workflows/test_destination_sftp.yml | 83 +++++++++++++++++++ pytest.ini | 3 +- .../filesystem_sftp/test_filesystem_sftp.py | 70 +++++++++++++++- 3 files changed, 153 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/test_destination_sftp.yml diff --git a/.github/workflows/test_destination_sftp.yml b/.github/workflows/test_destination_sftp.yml new file mode 100644 index 0000000000..27f7c602e1 --- /dev/null +++ b/.github/workflows/test_destination_sftp.yml @@ -0,0 +1,83 @@ +name: dest | sftp filesystem + +on: + pull_request: + branches: + - master + - devel + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + # NOTE: this workflow can't use github secrets! + # DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }} + + RUNTIME__SENTRY_DSN: https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752 + RUNTIME__LOG_LEVEL: ERROR + RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} + ACTIVE_DESTINATIONS: "[\"filesystem\"]" + ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\"]" + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + + run_loader: + name: dest | sftp filesystem + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' + strategy: + fail-fast: false + defaults: + run: + shell: bash + runs-on: "ubuntu-latest" + + steps: + - name: Check out + uses: actions/checkout@master + + - name: Start SFTP server + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d + + - name: Configure local SSH agent for tests + run: | + eval "$(ssh-agent -s)" + cp tests/load/filesystem_sftp/bootstrap/bobby_rsa ~/.ssh/id_rsa + cp tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub ~/.ssh/id_rsa.pub + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10.x" + + - name: Install Poetry + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp + + - name: Install dependencies + run: poetry install --no-interaction -E filesystem --with sentry-sdk --with pipeline + + - name: create secrets.toml + run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml + + - run: poetry run pytest tests/load -m "sftp" + name: Run SFTP Linux tests + + - name: Stop SFTP server + if: always() + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v diff --git a/pytest.ini b/pytest.ini index 1d4e0df6dc..d6e85f8863 100644 --- a/pytest.ini +++ b/pytest.ini @@ -11,4 +11,5 @@ filterwarnings= ignore::DeprecationWarning markers = essential: marks all essential tests no_load: marks tests that do not load anything - needspyarrow17: marks tests that need pyarrow>=17.0.0 (deselected by default) \ No newline at end of file + needspyarrow17: marks tests that need pyarrow>=17.0.0 (deselected by default) + sftp: marks all sftp tests \ No newline at end of file diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index 3506871145..622d0db3bc 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -17,9 +17,9 @@ def get_config(config: FilesystemConfiguration = None) -> FilesystemConfiguratio return config -def get_key_path() -> str: +def get_key_path(user: str = "foo") -> str: current_dir = os.path.dirname(os.path.abspath(__file__)) - return os.path.join(current_dir, "bootstrap/foo_rsa") + return os.path.join(current_dir, f"bootstrap/{user}_rsa") @pytest.fixture(scope="module") @@ -30,6 +30,7 @@ def sftp_filesystem(): yield fs +@pytest.mark.sftp def test_filesystem_sftp_server(sftp_filesystem): test_file = "/data/countries.json" input_data = { @@ -59,6 +60,7 @@ def test_filesystem_sftp_server(sftp_filesystem): fs.rm(test_file) +@pytest.mark.sftp def test_filesystem_sftp_write(sftp_filesystem): import posixpath import pyarrow.parquet as pq @@ -89,6 +91,7 @@ def states(): assert sorted(result_states) == sorted(expected_states) +@pytest.mark.sftp @pytest.mark.parametrize("load_content", (True, False)) @pytest.mark.parametrize("glob_filter", ("**", "**/*.csv", "*.txt", "met_csv/A803/*.csv")) def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None: @@ -109,3 +112,66 @@ def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None: print(all_file_items) assert_sample_files(all_file_items, fs, config, load_content, glob_filter) + + +@pytest.mark.sftp +def test_filesystem_sftp_auth_useranme_password(): + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "pass" + + config = get_config() + fs, _ = fsspec_from_config(config) + + files = fs.ls("/data/samples") + assert len(files) > 0 + + +@pytest.mark.sftp +def test_filesystem_sftp_auth_private_key(): + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path() + + config = get_config() + fs, _ = fsspec_from_config(config) + + files = fs.ls("/data/samples") + + assert len(files) > 0 + + +@pytest.mark.sftp +def test_filesystem_sftp_auth_private_key_protected(): + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "bobby" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path("bobby") + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_PASSPHRASE"] = "passphrase123" + + config = get_config() + fs, _ = fsspec_from_config(config) + + files = fs.ls("/data/samples") + + assert len(files) > 0 + + +# Test requires - ssh_agent with user's bobby key loaded. The commands required are: +# eval "$(ssh-agent -s)" +# cp /path/to/tests/load/filesystem_sftp/bobby_rsa* ~/.ssh/id_rsa +@pytest.mark.sftp +def test_filesystem_sftp_auth_private_ssh_agent(): + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://0.0.0.0/data/samples" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "bobby" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "passphrase123" + + config = get_config() + fs, _ = fsspec_from_config(config) + + files = fs.ls("/data/samples") + + assert len(files) > 0 From 6f3c56a519c90a74a7b3d387c5a7c01cc2ad9129 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 17:40:47 +0200 Subject: [PATCH 23/41] test: sftp skip test when agent not configured --- .../filesystem_sftp/test_filesystem_sftp.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index 622d0db3bc..e7551fc712 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -22,6 +22,34 @@ def get_key_path(user: str = "foo") -> str: return os.path.join(current_dir, f"bootstrap/{user}_rsa") +def files_are_equal(file1_path, file2_path): + try: + with open(file1_path, "r", encoding="utf-8") as f1, open( + file2_path, "r", encoding="utf-8" + ) as f2: + return f1.read() == f2.read() + except FileNotFoundError: + return False + + +def is_ssh_agent_ready(): + try: + # Check if SSH agent is running + ssh_agent_pid = os.getenv("SSH_AGENT_PID") + if not ssh_agent_pid: + return False + + # Check if the key is present and matches + id_rsa_pub_path = os.path.expanduser("~/.ssh/id_rsa") + bobby_rsa_pub_path = os.path.expanduser(get_key_path("bobby")) + if not os.path.isfile(id_rsa_pub_path) or not os.path.isfile(bobby_rsa_pub_path): + return False + + return files_are_equal(id_rsa_pub_path, bobby_rsa_pub_path) + except Exception: + return False + + @pytest.fixture(scope="module") def sftp_filesystem(): fs = fsspec.filesystem( @@ -163,8 +191,12 @@ def test_filesystem_sftp_auth_private_key_protected(): # eval "$(ssh-agent -s)" # cp /path/to/tests/load/filesystem_sftp/bobby_rsa* ~/.ssh/id_rsa @pytest.mark.sftp +@pytest.mark.skipif( + not is_ssh_agent_ready(), + reason="SSH agent is not running or bobby's private key isn't stored in ~/.ssh/id_rsa", +) def test_filesystem_sftp_auth_private_ssh_agent(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://0.0.0.0/data/samples" + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "bobby" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "passphrase123" From d9c6f922721cc8d131886f4946f84c39c4833d5b Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 17:56:26 +0200 Subject: [PATCH 24/41] fix: poetry lock --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 77e3311df8..e0156dec3c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -9809,4 +9809,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "6f785bc3b710c8752952e233ed848df5aa0cd7edbee73dcfe8fdf045b506cac7" +content-hash = "0a6a967487e09eba41371f40abd2ced88c83621d82320e09847ddb60a2fe3a4c" From f16fff871a1e45d182b2387e4cf113f08a1fa440 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:03:56 +0200 Subject: [PATCH 25/41] fix: github workflow --- .github/workflows/test_destination_sftp.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_destination_sftp.yml b/.github/workflows/test_destination_sftp.yml index 27f7c602e1..8bf83cff97 100644 --- a/.github/workflows/test_destination_sftp.yml +++ b/.github/workflows/test_destination_sftp.yml @@ -46,9 +46,10 @@ jobs: - name: Configure local SSH agent for tests run: | + mkdir -p /home/runner/.ssh + cp tests/load/filesystem_sftp/bootstrap/bobby_rsa /home/runner/.ssh/id_rsa + cp tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub /home/runner/.ssh/id_rsa.pub eval "$(ssh-agent -s)" - cp tests/load/filesystem_sftp/bootstrap/bobby_rsa ~/.ssh/id_rsa - cp tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub ~/.ssh/id_rsa.pub - name: Setup Python uses: actions/setup-python@v4 From a94e525960e8c9ce655738ddba11dfc5104df860 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:09:37 +0200 Subject: [PATCH 26/41] fix: run only sftp tests --- .github/workflows/test_destination_sftp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_destination_sftp.yml b/.github/workflows/test_destination_sftp.yml index 8bf83cff97..35a9cc74ba 100644 --- a/.github/workflows/test_destination_sftp.yml +++ b/.github/workflows/test_destination_sftp.yml @@ -76,7 +76,7 @@ jobs: - name: create secrets.toml run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml - - run: poetry run pytest tests/load -m "sftp" + - run: poetry run pytest tests/load/filesystem_sftp -m "sftp" name: Run SFTP Linux tests - name: Stop SFTP server From a8ad274023ff717f488635367fc628fd720555d5 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:15:57 +0200 Subject: [PATCH 27/41] fix: merge conflict regression --- dlt/common/storages/fsspec_filesystem.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index 516588b656..6ac5f31007 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -139,6 +139,10 @@ def prepare_fsspec_args(config: FilesystemConfiguration) -> DictStrAny: register_implementation("gdrive", GoogleDriveFileSystem, "GoogleDriveFileSystem") fs_kwargs.update(DEFAULT_KWARGS.get(protocol, {})) + + if protocol == "sftp": + fs_kwargs.clear() + if config.kwargs is not None: fs_kwargs.update(config.kwargs) if config.client_kwargs is not None: From 4123a171b7f81cb4693ca3b4d9c5038bf27bf7e2 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:25:19 +0200 Subject: [PATCH 28/41] fix: ssh-agent for tests --- .github/workflows/test_destination_sftp.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_destination_sftp.yml b/.github/workflows/test_destination_sftp.yml index 35a9cc74ba..6eb9e63975 100644 --- a/.github/workflows/test_destination_sftp.yml +++ b/.github/workflows/test_destination_sftp.yml @@ -76,8 +76,12 @@ jobs: - name: create secrets.toml run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml - - run: poetry run pytest tests/load/filesystem_sftp -m "sftp" - name: Run SFTP Linux tests + - name: Run SFTP Linux tests + run: | + eval "$(ssh-agent -s)" + ls -lha /home/runner/.ssh + poetry run pytest tests/load/filesystem_sftp -m "sftp" + - name: Stop SFTP server if: always() From f4352eb13c5793356bca0ce9cb6aafb38bb6bf6c Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Fri, 6 Sep 2024 16:34:24 +0200 Subject: [PATCH 29/41] fix: pytest executions excluding sftp --- .github/workflows/test_destinations.yml | 2 +- .github/workflows/test_local_destinations.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_destinations.yml b/.github/workflows/test_destinations.yml index 7fae69ff9e..2fdb315893 100644 --- a/.github/workflows/test_destinations.yml +++ b/.github/workflows/test_destinations.yml @@ -87,6 +87,6 @@ jobs: if: ${{ ! (contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule')}} - run: | - poetry run pytest tests/load + poetry run pytest tests/load -m "not sftp" name: Run all tests Linux if: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule'}} diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml index d09397a16e..f619653b81 100644 --- a/.github/workflows/test_local_destinations.yml +++ b/.github/workflows/test_local_destinations.yml @@ -101,7 +101,7 @@ jobs: run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml # always run full suite, also on branches - - run: poetry run pytest tests/load && poetry run pytest tests/cli + - run: poetry run pytest tests/load -m "not sftp" && poetry run pytest tests/cli name: Run tests Linux env: DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data From de96bd19d0138636c6bbd8305b26f701c08ccfb0 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Mon, 9 Sep 2024 13:14:10 +0200 Subject: [PATCH 30/41] fix: CI test execution --- tests/load/filesystem_sftp/test_filesystem_sftp.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index e7551fc712..f8274751a8 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -34,6 +34,10 @@ def files_are_equal(file1_path, file2_path): def is_ssh_agent_ready(): try: + # Never skip tests when running in CI + if os.getenv("CI"): + return True + # Check if SSH agent is running ssh_agent_pid = os.getenv("SSH_AGENT_PID") if not ssh_agent_pid: @@ -190,6 +194,7 @@ def test_filesystem_sftp_auth_private_key_protected(): # Test requires - ssh_agent with user's bobby key loaded. The commands required are: # eval "$(ssh-agent -s)" # cp /path/to/tests/load/filesystem_sftp/bobby_rsa* ~/.ssh/id_rsa +# cp /path/to/tests/load/filesystem_sftp/bobby_rsa.pub ~/.ssh/id_rsa.pub @pytest.mark.sftp @pytest.mark.skipif( not is_ssh_agent_ready(), From 69d7e9b08d3368de42e13ae0397cba39d172af8b Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:32:34 +0200 Subject: [PATCH 31/41] test: sftp login with signed certificate --- .../load/filesystem_sftp/bootstrap/Dockerfile | 53 +++++++++++++++++-- tests/load/filesystem_sftp/bootstrap/SETUP.md | 49 +++++++++++++++++ .../load/filesystem_sftp/bootstrap/billy_rsa | 49 +++++++++++++++++ .../bootstrap/billy_rsa-cert.pub | 1 + .../filesystem_sftp/bootstrap/billy_rsa.pub | 1 + tests/load/filesystem_sftp/bootstrap/ca_rsa | 49 +++++++++++++++++ .../load/filesystem_sftp/bootstrap/ca_rsa.pub | 1 + tests/load/filesystem_sftp/docker-compose.yml | 1 + .../filesystem_sftp/test_filesystem_sftp.py | 17 ++++++ 9 files changed, 217 insertions(+), 4 deletions(-) create mode 100644 tests/load/filesystem_sftp/bootstrap/SETUP.md create mode 100644 tests/load/filesystem_sftp/bootstrap/billy_rsa create mode 100644 tests/load/filesystem_sftp/bootstrap/billy_rsa-cert.pub create mode 100644 tests/load/filesystem_sftp/bootstrap/billy_rsa.pub create mode 100644 tests/load/filesystem_sftp/bootstrap/ca_rsa create mode 100644 tests/load/filesystem_sftp/bootstrap/ca_rsa.pub diff --git a/tests/load/filesystem_sftp/bootstrap/Dockerfile b/tests/load/filesystem_sftp/bootstrap/Dockerfile index b4966bfa46..76866c3b9e 100644 --- a/tests/load/filesystem_sftp/bootstrap/Dockerfile +++ b/tests/load/filesystem_sftp/bootstrap/Dockerfile @@ -6,6 +6,19 @@ RUN apt-get update && \ apt-get install -y openssh-server && \ rm -rf /var/lib/apt/lists/* +# Certificate Authority (CA): public key +COPY ca_rsa.pub /etc/ssh/ca_rsa.pub +RUN chmod 600 /etc/ssh/ca_rsa.pub + +RUN mkdir -p /etc/ssh/auth_principals && \ + echo "billy" >> /etc/ssh/auth_principals/billy + +RUN mkdir -p /run/sshd && \ + echo "SyslogFacility AUTH" >> /etc/ssh/sshd_config && \ + echo "LogLevel INFO" >> /etc/ssh/sshd_config && \ + echo "TrustedUserCAKeys /etc/ssh/ca_rsa.pub" >> /etc/ssh/sshd_config && \ + echo "AuthorizedPrincipalsFile /etc/ssh/auth_principals/billy" >> /etc/ssh/sshd_config + # USER foo: set up user for SFTP with no shell login RUN useradd -m -d /home/foo -s /usr/sbin/nologin foo && \ mkdir -p /home/foo/.ssh && \ @@ -27,9 +40,9 @@ RUN mkdir -p /home/foo/sftp/data && \ chown foo:foo /home/foo/sftp/data && \ chmod 755 /home/foo/sftp/data + # USER foo: configure SSH for SFTP: allow password and pubkey authentication -RUN mkdir -p /run/sshd && \ - echo "Match User foo" >> /etc/ssh/sshd_config && \ +RUN echo "Match User foo" >> /etc/ssh/sshd_config && \ echo " ChrootDirectory /home/foo/sftp" >> /etc/ssh/sshd_config && \ echo " ForceCommand internal-sftp" >> /etc/ssh/sshd_config && \ echo " PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ @@ -58,8 +71,7 @@ RUN mkdir -p /home/bobby/sftp/data && \ chmod 755 /home/bobby/sftp/data # USER bobby: configure SSH for SFTP: allow password and pubkey authentication -RUN mkdir -p /run/sshd && \ - echo "Match User bobby" >> /etc/ssh/sshd_config && \ +RUN echo "Match User bobby" >> /etc/ssh/sshd_config && \ echo " ChrootDirectory /home/bobby/sftp" >> /etc/ssh/sshd_config && \ echo " ForceCommand internal-sftp" >> /etc/ssh/sshd_config && \ echo " PasswordAuthentication no" >> /etc/ssh/sshd_config && \ @@ -69,6 +81,39 @@ RUN mkdir -p /run/sshd && \ echo " AllowTcpForwarding no" >> /etc/ssh/sshd_config && \ echo " X11Forwarding no" >> /etc/ssh/sshd_config +# USER billy: set up user for SFTP with no shell login +RUN useradd -m -d /home/billy -s /usr/sbin/nologin billy && \ + mkdir -p /home/billy/.ssh && \ + chown billy:billy /home/billy/.ssh && \ + chmod 700 /home/billy/.ssh + +# USER billy: create a directory for SFTP that the user will have access to +RUN mkdir -p /home/billy/sftp/data && \ + chown root:root /home/billy /home/billy/sftp && \ + chmod 755 /home/billy /home/billy/sftp && \ + chown billy:billy /home/billy/sftp/data && \ + chmod 755 /home/billy/sftp/data + +# USER billy: certificated signed with CA key +COPY billy_rsa-cert.pub /home/billy/.ssh/billy_rsa-cert.pub + +RUN chown billy:billy /home/billy/.ssh/billy_rsa-cert.pub && \ + chmod 600 /home/billy/.ssh/billy_rsa-cert.pub + +# USER billy: configure SSH for SFTP with certificate authentication +RUN echo "Match User billy" >> /etc/ssh/sshd_config && \ + echo " ChrootDirectory /home/billy/sftp" >> /etc/ssh/sshd_config && \ + echo " ForceCommand internal-sftp" >> /etc/ssh/sshd_config && \ + echo " PasswordAuthentication no" >> /etc/ssh/sshd_config && \ + echo " PubkeyAuthentication yes" >> /etc/ssh/sshd_config && \ + echo " PermitTunnel no" >> /etc/ssh/sshd_config && \ + echo " AllowAgentForwarding no" >> /etc/ssh/sshd_config && \ + echo " AllowTcpForwarding no" >> /etc/ssh/sshd_config && \ + echo " X11Forwarding no" >> /etc/ssh/sshd_config + + + + EXPOSE 22 # run sshd on container start diff --git a/tests/load/filesystem_sftp/bootstrap/SETUP.md b/tests/load/filesystem_sftp/bootstrap/SETUP.md new file mode 100644 index 0000000000..0620bbb075 --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/SETUP.md @@ -0,0 +1,49 @@ +## Users: Authentication + +This guide covers the setup of different authentication methods for SSH users, including public/private key pairs, passphrase protection, and certificate-based authentication. + +### User foo: Public/Private Key Pair Without Passphrase + +Generate a key pair for `foo` without a passphrase: +```bash +# Generate the key pair +ssh-keygen -t rsa -b 4096 -C "foo@example.com" -f foo_rsa + +# Secure the private key +chmod 600 foo_rsa +``` + +### User bobby: Public/Private Key Pair With Passphrase + +Generate a key pair for `bobby` with a passphrase (passphrase=passphrase123): +```bash +# Generate the key pair with a passphrase +ssh-keygen -t rsa -b 4096 -C "bobby@example.com" -f bobby_rsa + +# Secure the private key +chmod 600 bobby_rsa +``` + +### Certificate Authority (CA) Setup + +Generate the Certificate Authority (CA) key pair: +```bash +# Generate a self-signed CA key pair +ssh-keygen -t rsa -b 4096 -f ca_rsa -N "" +``` + +### User billy: Public/Private Key Pair with CA-Signed Certificate + +Generate and sign a key pair for `billy` using the CA: +```bash +# Generate the user key pair for billy +ssh-keygen -t rsa -b 4096 -C "billy@example.com" -f billy_rsa + +# Sign billy's public key with the CA +ssh-keygen -s ca_rsa -I billy-cert -n billy billy_rsa.pub +``` + +### Important Files + +- **ca_rsa.pub**: The CA public key. This key is used by the server to verify certificates. +- **billy_rsa-cert.pub**: Billy’s signed certificate. This certificate is used by Billy to authenticate with the server. diff --git a/tests/load/filesystem_sftp/bootstrap/billy_rsa b/tests/load/filesystem_sftp/bootstrap/billy_rsa new file mode 100644 index 0000000000..ceafa496e0 --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/billy_rsa @@ -0,0 +1,49 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn +NhAAAAAwEAAQAAAgEAt6vSxFUsoMmftSc2sn7yd/GqN1QuD2DlCzdp+MNF2awKe4JXC+9v +OKDwYWk1DU1Z2r400hUFpsColXrO7kNJKdLNZw7tUcIwusXUWobh3zT0VCpompycJI1ylZ +HTX7cSwuSX43VRNmsj049sawTtwb+5yURsZuc8fLRX4x0EYjbqrP4TxEvumvXf2vjPQg+l +d4QCASTWNijN0aBRV+fnfCXlkkt+9PhyqUYFmLfd//JclEs5JEyOmsX68tUTx90XNKC4ef +0vYDnQsMTDujHOwjla9ZTVMvhXBg10ZUd+rmbWlwD9gCiIqL5YZg/hsJ9QEVYr+03hEq+q +nBAqRTwvFXaJHGUVNg7eCkE3w4RFSqU7FIwag9naHBQvbW0LLMAIpJqRPQ81J0LJtxXPsi +X+xt4Oah+jgWMAXtKkfzzZfu+BRDZ4F/TqRj7xgPrwsCgIFE6n3PpgnBRZb1IczQ0VFuGX +O2kvCpH9bJ5Rb8MEJyZ1z/GqiagBfDhUIXSzGLSxjGfNR6bmrrBL2J3PPviAgd1epjFhMj +nkeWysT9U17PoyiJQlVnwEAQo04pEWQLhrVBz0DSjH9noElhVURvvHClfkgYA1HrSCz+qH +Lu+1vgo9S/+14i2q0F/ILPGJob7dlZNIxZcLVnN2U72/1WS3z0DZ2xr7kMSqMQ3QiMzrez +UAAAdImHtW95h7VvcAAAAHc3NoLXJzYQAAAgEAt6vSxFUsoMmftSc2sn7yd/GqN1QuD2Dl +Czdp+MNF2awKe4JXC+9vOKDwYWk1DU1Z2r400hUFpsColXrO7kNJKdLNZw7tUcIwusXUWo +bh3zT0VCpompycJI1ylZHTX7cSwuSX43VRNmsj049sawTtwb+5yURsZuc8fLRX4x0EYjbq +rP4TxEvumvXf2vjPQg+ld4QCASTWNijN0aBRV+fnfCXlkkt+9PhyqUYFmLfd//JclEs5JE +yOmsX68tUTx90XNKC4ef0vYDnQsMTDujHOwjla9ZTVMvhXBg10ZUd+rmbWlwD9gCiIqL5Y +Zg/hsJ9QEVYr+03hEq+qnBAqRTwvFXaJHGUVNg7eCkE3w4RFSqU7FIwag9naHBQvbW0LLM +AIpJqRPQ81J0LJtxXPsiX+xt4Oah+jgWMAXtKkfzzZfu+BRDZ4F/TqRj7xgPrwsCgIFE6n +3PpgnBRZb1IczQ0VFuGXO2kvCpH9bJ5Rb8MEJyZ1z/GqiagBfDhUIXSzGLSxjGfNR6bmrr +BL2J3PPviAgd1epjFhMjnkeWysT9U17PoyiJQlVnwEAQo04pEWQLhrVBz0DSjH9noElhVU +RvvHClfkgYA1HrSCz+qHLu+1vgo9S/+14i2q0F/ILPGJob7dlZNIxZcLVnN2U72/1WS3z0 +DZ2xr7kMSqMQ3QiMzrezUAAAADAQABAAACAGJcgTYliw3cjEv+YRt8QqubXxXcXgJcWK0G +ExlmsgydRsvYBHfe1QhVoAX6yq+CAiHUBQOQwIMJ3/6VUyZkIFVmkBul8c/8GA6eDVghre +rUOBiu0XAjeLdWjMuu2a0DpOd8dh8Y39It1HTi9SPm8tD0tTElFIyTP2BlUD/PdV6HnGvi +cYGJTJHuJODmqE/vgySbLFuid5lvhrHBkm5qQhzJSsVq6uWT/AvM9QhRR3FfQNcl5ccbvK +8EYZ0UIVDChT/o5oJl5a45qBpSdaNMMzbucljcSMTQHAKgx/nJLP2HE1qw7BFrI6yCq2KV +FL96we1W4ZzmmydfrhV/zrNfRLLohPv6vbEho/cuMfZAwtdHunYJSV3e5bBNKbjTAIh7O2 +nIBDHLUFhRUZHJcWcs7n0gQgmD2cpIT71fyTo3+RV0hq1j6p8idoFxG3O+gs1a3tJ+3Koj +J1+2r2ocUkY2f2UJZtP2dE8c2ZK5ttd7rqv6s4d9sHn6n87hn4UKVf2O5Y7Mlje/v5aTn2 +9KjR0pkGe3WdMcBl8n7usq04TjBNChYjUk1l1dQ4Te66OQ6XDCqnLJxETPTgsa3iVyYjR0 +CNuleqW+fHGbd7RhmWS6mlTHKYo5dCcilBNxrFQUyJpPPGyDZdZRiYWk5/yqbSs30k20mH +gA+TJoiBiDRJYvySRtAAABABeBxEbk8oGH1cK1+4w4kpAg5zZ7cAFuibMOP3TnxQNIElBF +RGFGZJVGE+moYiHRWHBKtEbTKVqA+LUqStE2dj6wjxg7d6qdtUm/7Dw6ODSvWCI7YXTvys +YsWuktbmqii3VIy6ZOo6Hp0jdQOKnZni/Es9gfSpKA3f6vmc9sjiIFJBINhOOu7l7XiaCx +6tHuhypqAmnrQxtso7Pu2WEofNCwHc/c2QBpVTW32QOpfHc5MZ0gt9V4ozxhDa8sP6QQqA +h2KhOWc2tU0kIaespfoeFKPmNTbDVXT8uiJqHE6+RkEtlLFqLuL9o2U9mPErG1d5HUUkUj +HhtI3rWqeESPDEoAAAEBAN3U9dVp0G84XVtdPkFV8d5CxJZgM6Wwse1ZzGyy9GcO2vUhE8 +H+MistGl/18JA89UqkPEuWOVTM2ISj8mHpXBiEHOEQpm53sktO9BBBrqlfoUQ4/pgOuMzx +TvZozZ22XhyX+b4oVdwAcqzV9u/QwaPDZn5SKdVDNznbuFba0Oz5OcPT/O/0doTldMwiwI +PN4ptoCH+4b5gNKr6lcEtiFHwKJ89XUK0J3DIufcykjSQ0Ff/xeVR8C5yK44xTni26tpYS +VHJbJ5PiaLyGnIeBrA7atKtzayz1Vt+8h7RLkoFim2QT9782763CfFDyqzZndill089dwV +wg10ObtbNLaWMAAAEBANP2JqpTDzVV25BspY3o9tQK9wopr3DFfAQV2nG02e1ygwHgRcgA +hkEFYidJX3K4BbVaVoP19D0So1ERjXsmbDiTRgXRgZQteX+5xMDMDUMyCOZVY/gQPB7K3E +UeLHaHQiAqqwz9qhbtvfvjBkKwg6HgF+EfB97eFwBzkACYa+xQQrHTOpwM666vlEnKI6AI +8+KgOpzHs04cowIm0sUUVcn1eeOj8RTa7KAPp06hWUcQnCO9+Pb1hCS1GvXkGNUv0K4h1T +mogsfGdRKbeyG2izaPQde5pykB0G7INqzFZJ5Rgydc0khOMHFiv8grDKH43csa3IUMailM +8OBopUpl+IcAAAARYmlsbHlAZXhhbXBsZS5jb20BAg== +-----END OPENSSH PRIVATE KEY----- diff --git a/tests/load/filesystem_sftp/bootstrap/billy_rsa-cert.pub b/tests/load/filesystem_sftp/bootstrap/billy_rsa-cert.pub new file mode 100644 index 0000000000..147113c067 --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/billy_rsa-cert.pub @@ -0,0 +1 @@ +ssh-rsa-cert-v01@openssh.com AAAAHHNzaC1yc2EtY2VydC12MDFAb3BlbnNzaC5jb20AAAAgBOH3dEBDItfrfBWyMDp3/eonWDzVrHs5NAVMTk6EcYEAAAADAQABAAACAQC3q9LEVSygyZ+1JzayfvJ38ao3VC4PYOULN2n4w0XZrAp7glcL7284oPBhaTUNTVnavjTSFQWmwKiVes7uQ0kp0s1nDu1RwjC6xdRahuHfNPRUKmianJwkjXKVkdNftxLC5JfjdVE2ayPTj2xrBO3Bv7nJRGxm5zx8tFfjHQRiNuqs/hPES+6a9d/a+M9CD6V3hAIBJNY2KM3RoFFX5+d8JeWSS370+HKpRgWYt93/8lyUSzkkTI6axfry1RPH3Rc0oLh5/S9gOdCwxMO6Mc7COVr1lNUy+FcGDXRlR36uZtaXAP2AKIiovlhmD+Gwn1ARViv7TeESr6qcECpFPC8VdokcZRU2Dt4KQTfDhEVKpTsUjBqD2docFC9tbQsswAikmpE9DzUnQsm3Fc+yJf7G3g5qH6OBYwBe0qR/PNl+74FENngX9OpGPvGA+vCwKAgUTqfc+mCcFFlvUhzNDRUW4Zc7aS8Kkf1snlFvwwQnJnXP8aqJqAF8OFQhdLMYtLGMZ81HpuausEvYnc8++ICB3V6mMWEyOeR5bKxP1TXs+jKIlCVWfAQBCjTikRZAuGtUHPQNKMf2egSWFVRG+8cKV+SBgDUetILP6ocu77W+Cj1L/7XiLarQX8gs8Ymhvt2Vk0jFlwtWc3ZTvb/VZLfPQNnbGvuQxKoxDdCIzOt7NQAAAAAAAAAAAAAAAQAAAApiaWxseS1jZXJ0AAAACQAAAAViaWxseQAAAAAAAAAA//////////8AAAAAAAAAggAAABVwZXJtaXQtWDExLWZvcndhcmRpbmcAAAAAAAAAF3Blcm1pdC1hZ2VudC1mb3J3YXJkaW5nAAAAAAAAABZwZXJtaXQtcG9ydC1mb3J3YXJkaW5nAAAAAAAAAApwZXJtaXQtcHR5AAAAAAAAAA5wZXJtaXQtdXNlci1yYwAAAAAAAAAAAAACFwAAAAdzc2gtcnNhAAAAAwEAAQAAAgEA57aSrYecHkvXEPQYZ3ZXyld2bJ3CeuD+rmEtbGFYuJ4NjobBTle1fBDULcemeIs2P7rsybixIMVgAhbVRw9Dkm+8yXJXaUib64m5AkPtDrwPZh6ipxAn6EuuRc0bUdw8JGrLk6j4Pb2OClHAI9SEvvhYgdE+2bf2xMvpI63g4elBb7Z0SzKWdPnBbyCDmdQTcC5Fz/8sYzrx/MAhqwZ1kFLr2OahsajpjuJJnL7K7rL+JN2OiBj9eh4Of2IX15FMFV8LHdLsOxn9E3oOQQxNfkXyT/qcyd2SwqmJHEM6AyQuWkWKowtjrLX410VJMUFw8w9q7oSW+Wr6UkTZXg74W+jfRjOBt6CO3I+glEdpeJAV+odaY8Sc3vv+sSRNo+O0Yzskq1voxpw5xQIdFZIzQmN5T5M8bNFVoM0dZ61MiC8gYWAj3w/kUF7hjb4PW2MTZSxP11J7sR9vdV86bos9r1kad6QEiowkJBRYTGPOZZEj0X/1JjrETahFXjV8696Reub5vPzVcizMKQ93iJNMNf/QX4Xrb3FVxmA0HyM6iXM/oLWCQ4A4hdnzONS4e6hURw9BUF7094/dea7Agaw9V97YDXyBBUx80kfwMWdGMYIvRlWJiGt3KrgPERJsx7CCJfPWz3A/Gbgrk9cZXXB5K2gJpQAWv1UpZCWwc2gQqu0AAAIUAAAADHJzYS1zaGEyLTUxMgAAAgCmUjbVFDwtQV6DSV1LO5Xj2MQKaI739Z06PfTadFQ2Ppcp7cmGdQ0AD2HiqiwDcYW4qYpTLB+P4vavExIPrSyX40fTCgDOXJc7SvXQlnbKkNcuQNn9b6EfLMGS+wXdjDHR5rdUTYrsz3KLhJznahECT2ZQdeuNGKcDPFjU38XmslTeSeExjTu3bxY3bqlAnXMD3k7KG/q7H4sa3um9tV/2iycRxTk1GWk2thifMkxVKNmvPSruqI7EKo52dTkYVVM0OgwHK9etaKs8cc4FFyuaPPUBVO76hwYUoUbdfMbIOAQ7J1mnjTB+HkPhbz4/eu9bhcwXII1I/lxN4+NDL+7CQls7+WjrqlR8zU296/lN/xN+ZK/e8cZVaDUxcM0/1YugXTxnwGeosHd2Jj64p7w/F8wKgzy6EEEvshBANH+DsDm47RHzf8lV3JdonLs9J8byejP/6A7pvU9tqhIL7Gb0anKyxyC7BT9S361AVFo7s1ANH8j8pVc7hU2q4D9OPWPx2l5nO9WOTuzdrq6BtEw0s5qaRsXguJVcW5ba7wIPCgu5LRDGNW6Hb244Lce1HZo+aSz8JbAJXpb/TM9eWz3Z2xjnmkJrhtmvck7Ykix0Mt6j9bLGRv5pSLZIYLfVHJcdgsnQPT06c/1Cyo5gYTeeflrqK4O3TIyoBurA0rVtew== billy@example.com diff --git a/tests/load/filesystem_sftp/bootstrap/billy_rsa.pub b/tests/load/filesystem_sftp/bootstrap/billy_rsa.pub new file mode 100644 index 0000000000..8b74bdb00b --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/billy_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC3q9LEVSygyZ+1JzayfvJ38ao3VC4PYOULN2n4w0XZrAp7glcL7284oPBhaTUNTVnavjTSFQWmwKiVes7uQ0kp0s1nDu1RwjC6xdRahuHfNPRUKmianJwkjXKVkdNftxLC5JfjdVE2ayPTj2xrBO3Bv7nJRGxm5zx8tFfjHQRiNuqs/hPES+6a9d/a+M9CD6V3hAIBJNY2KM3RoFFX5+d8JeWSS370+HKpRgWYt93/8lyUSzkkTI6axfry1RPH3Rc0oLh5/S9gOdCwxMO6Mc7COVr1lNUy+FcGDXRlR36uZtaXAP2AKIiovlhmD+Gwn1ARViv7TeESr6qcECpFPC8VdokcZRU2Dt4KQTfDhEVKpTsUjBqD2docFC9tbQsswAikmpE9DzUnQsm3Fc+yJf7G3g5qH6OBYwBe0qR/PNl+74FENngX9OpGPvGA+vCwKAgUTqfc+mCcFFlvUhzNDRUW4Zc7aS8Kkf1snlFvwwQnJnXP8aqJqAF8OFQhdLMYtLGMZ81HpuausEvYnc8++ICB3V6mMWEyOeR5bKxP1TXs+jKIlCVWfAQBCjTikRZAuGtUHPQNKMf2egSWFVRG+8cKV+SBgDUetILP6ocu77W+Cj1L/7XiLarQX8gs8Ymhvt2Vk0jFlwtWc3ZTvb/VZLfPQNnbGvuQxKoxDdCIzOt7NQ== billy@example.com diff --git a/tests/load/filesystem_sftp/bootstrap/ca_rsa b/tests/load/filesystem_sftp/bootstrap/ca_rsa new file mode 100644 index 0000000000..6616331a46 --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/ca_rsa @@ -0,0 +1,49 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn +NhAAAAAwEAAQAAAgEA57aSrYecHkvXEPQYZ3ZXyld2bJ3CeuD+rmEtbGFYuJ4NjobBTle1 +fBDULcemeIs2P7rsybixIMVgAhbVRw9Dkm+8yXJXaUib64m5AkPtDrwPZh6ipxAn6EuuRc +0bUdw8JGrLk6j4Pb2OClHAI9SEvvhYgdE+2bf2xMvpI63g4elBb7Z0SzKWdPnBbyCDmdQT +cC5Fz/8sYzrx/MAhqwZ1kFLr2OahsajpjuJJnL7K7rL+JN2OiBj9eh4Of2IX15FMFV8LHd +LsOxn9E3oOQQxNfkXyT/qcyd2SwqmJHEM6AyQuWkWKowtjrLX410VJMUFw8w9q7oSW+Wr6 +UkTZXg74W+jfRjOBt6CO3I+glEdpeJAV+odaY8Sc3vv+sSRNo+O0Yzskq1voxpw5xQIdFZ +IzQmN5T5M8bNFVoM0dZ61MiC8gYWAj3w/kUF7hjb4PW2MTZSxP11J7sR9vdV86bos9r1ka +d6QEiowkJBRYTGPOZZEj0X/1JjrETahFXjV8696Reub5vPzVcizMKQ93iJNMNf/QX4Xrb3 +FVxmA0HyM6iXM/oLWCQ4A4hdnzONS4e6hURw9BUF7094/dea7Agaw9V97YDXyBBUx80kfw +MWdGMYIvRlWJiGt3KrgPERJsx7CCJfPWz3A/Gbgrk9cZXXB5K2gJpQAWv1UpZCWwc2gQqu +0AAAdYWw35zlsN+c4AAAAHc3NoLXJzYQAAAgEA57aSrYecHkvXEPQYZ3ZXyld2bJ3CeuD+ +rmEtbGFYuJ4NjobBTle1fBDULcemeIs2P7rsybixIMVgAhbVRw9Dkm+8yXJXaUib64m5Ak +PtDrwPZh6ipxAn6EuuRc0bUdw8JGrLk6j4Pb2OClHAI9SEvvhYgdE+2bf2xMvpI63g4elB +b7Z0SzKWdPnBbyCDmdQTcC5Fz/8sYzrx/MAhqwZ1kFLr2OahsajpjuJJnL7K7rL+JN2OiB +j9eh4Of2IX15FMFV8LHdLsOxn9E3oOQQxNfkXyT/qcyd2SwqmJHEM6AyQuWkWKowtjrLX4 +10VJMUFw8w9q7oSW+Wr6UkTZXg74W+jfRjOBt6CO3I+glEdpeJAV+odaY8Sc3vv+sSRNo+ +O0Yzskq1voxpw5xQIdFZIzQmN5T5M8bNFVoM0dZ61MiC8gYWAj3w/kUF7hjb4PW2MTZSxP +11J7sR9vdV86bos9r1kad6QEiowkJBRYTGPOZZEj0X/1JjrETahFXjV8696Reub5vPzVci +zMKQ93iJNMNf/QX4Xrb3FVxmA0HyM6iXM/oLWCQ4A4hdnzONS4e6hURw9BUF7094/dea7A +gaw9V97YDXyBBUx80kfwMWdGMYIvRlWJiGt3KrgPERJsx7CCJfPWz3A/Gbgrk9cZXXB5K2 +gJpQAWv1UpZCWwc2gQqu0AAAADAQABAAACAQDf6BPK7c0VlZGoR0fByqDA3U2JXdUNapeQ +KEV9gn/6Pni5LkxjiOvNjHGMH24k89kjMl0X/InlMfU5b5j+xqg0PBdAzmXbThelPntxzf +OoFmAG/TmNmFYH6gHoX2+z5c6UHkLEWDYxaxdzr1WtCfKQm2jjYTCLsHIb7zNFkANMKlta +OQQdmi+7r5CU8uVYY4+5Cm3ZtSnOZapq47plCqPWJ9rRyOOcyq2OreoWNmPgz0Q4mYPezc +87DwpRW5fQbg4IZMRYOHagEdf01zAo+Vt01uB1GqYhhyQHSU496lSHrpHAF40FSr8xqNJi +Kwi0ORmNNmvNOoCtWgu0LeWiN3MTflJFOTVzXD2+GB59BR8O2mE5akNOce3goDcPDz5rYA +onFi3KRqm6rA1pQKGkTVBCzCt4rEOeZ69yaGcNZl7qv4sQw1FOFjKsmOjXW9hVwDuHZU37 +bruCNrEHnFh27KDic0EptKypDC07Q+PmC91gosuGzM3U8fyB08m/YCFAd+WXUQSlIJebxA +SjzneIoA7FBUf8l5Cr9vzaTiq66+Epj2uwMy6EssVmsY8Ey++qOYEwVccTMV5EH0/AdZUh +T7ynjQH81whLYh6CrmxFcdUtvkQeQbGInqQtOVG592CIZM+r/YmyRzuFIxEkT2RWVLJaJL +l0w9qehS3FP7nVs1kgvQAAAQBvxrDXLnGVocaBE4gSVnS3gUd00W6iY44m4aJLwC2PItNF +SAXYhCGtk+x/fE3LE5kyPM2N0Lp2hU2+a4+go3uz8o1WkbQX4vFIv2mUzUyjjBq8bAIHVM +tRjiQaVda8s79d9zpQYhB7hCicpNBR2CMiV4JBeYD46W4ds0cDCOb/gsUgrAOAatCq8B8K +vpepzIm8SNUkhLkTpP9MaekGdXktPyeIrMm7RD/ZBfdu7TiQidNXNErBr3IMLZnUARc9Kz +PWnMOayqPeCpVY+/Dl6jVx9F2QTc9BUcovltluRcT4ov9iPNpPVIOqV6iyU0SxjQGwU3Rl +Rzmf+lUKsiYZCp6CAAABAQD96tImcK0UE5AwSM1B5LSQIfyRNiXVNJXWmHZ4+hd3CztMXf +wZFtQNvxitK+ZD1FqMW3S6Gpy7Qn6BKl0qz+6e4oQdkdJ9wqkxTBaPL1y8rGscTYt/BGZH +ESI0xT3VnkDJNmrX0X5SzSJqFPb82Ei1UYAIypL5xVNc/lNXIrOADEnzjkEbh/AcPyQAjP +PLZI8T1qsv/gWw6bEI2tZWOZDIcPr10THMPvLG6nih7pwENsnI+TSRkyhPNj7nd0ZcNPnk +hGhjL2tFdus6ybZ5Y6UEgMjPISnGaGKhn8KTitn1LHrAKQQQPmeuLynTkDOAEuZ4/pA0J7 +i+4IB/y2uHrEhbAAABAQDpnSCpsKHT+51C1GEWHccowKaMC90O/TuuOBSKT29Xef8+q5Uo +U4lsVTYQTIKzXgtNaGKJLC7RdshIEiTZIT3V/VrPkpKu0jqpeRZCm2nOpabB/1/m0z+pxF +M3hb8svIMwlcw3m6ljbpuyDVHOMKHcRQS2IcalOqTehutlI5nlJsz9BlOmO4Wl0G1b/8dw +B630j7e9vBTuhbLhfjP+AExtU1xCQfkNKVyWVgDYJweWUzBicHKZN6IoTnW3DKpEue10mT +kXJb+MEgW8w4HODogYVvNQDf84NqC9up1zdfYqtcf+slfFcWyC/2NF4413ZnCQGPHTIKke +sqDOI+iF6HxXAAAAG2p1bGlhbkBKdWxpYW5zLUxhcHRvcC5sb2NhbAECAwQFBgc= +-----END OPENSSH PRIVATE KEY----- diff --git a/tests/load/filesystem_sftp/bootstrap/ca_rsa.pub b/tests/load/filesystem_sftp/bootstrap/ca_rsa.pub new file mode 100644 index 0000000000..e39fa19142 --- /dev/null +++ b/tests/load/filesystem_sftp/bootstrap/ca_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDntpKth5weS9cQ9BhndlfKV3ZsncJ64P6uYS1sYVi4ng2OhsFOV7V8ENQtx6Z4izY/uuzJuLEgxWACFtVHD0OSb7zJcldpSJvribkCQ+0OvA9mHqKnECfoS65FzRtR3DwkasuTqPg9vY4KUcAj1IS++FiB0T7Zt/bEy+kjreDh6UFvtnRLMpZ0+cFvIIOZ1BNwLkXP/yxjOvH8wCGrBnWQUuvY5qGxqOmO4kmcvsrusv4k3Y6IGP16Hg5/YhfXkUwVXwsd0uw7Gf0Teg5BDE1+RfJP+pzJ3ZLCqYkcQzoDJC5aRYqjC2OstfjXRUkxQXDzD2ruhJb5avpSRNleDvhb6N9GM4G3oI7cj6CUR2l4kBX6h1pjxJze+/6xJE2j47RjOySrW+jGnDnFAh0VkjNCY3lPkzxs0VWgzR1nrUyILyBhYCPfD+RQXuGNvg9bYxNlLE/XUnuxH291Xzpuiz2vWRp3pASKjCQkFFhMY85lkSPRf/UmOsRNqEVeNXzr3pF65vm8/NVyLMwpD3eIk0w1/9BfhetvcVXGYDQfIzqJcz+gtYJDgDiF2fM41Lh7qFRHD0FQXvT3j915rsCBrD1X3tgNfIEFTHzSR/AxZ0Yxgi9GVYmIa3cquA8REmzHsIIl89bPcD8ZuCuT1xldcHkraAmlABa/VSlkJbBzaBCq7Q== julian@Julians-Laptop.local diff --git a/tests/load/filesystem_sftp/docker-compose.yml b/tests/load/filesystem_sftp/docker-compose.yml index d9e992a8cd..3421b316ca 100644 --- a/tests/load/filesystem_sftp/docker-compose.yml +++ b/tests/load/filesystem_sftp/docker-compose.yml @@ -14,6 +14,7 @@ services: volumes: - ../../common/storages/samples:/home/foo/sftp/data/samples - ../../common/storages/samples:/home/bobby/sftp/data/samples + - ../../common/storages/samples:/home/billy/sftp/data/samples networks: sftpserver: diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index f8274751a8..d388952668 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -212,3 +212,20 @@ def test_filesystem_sftp_auth_private_ssh_agent(): files = fs.ls("/data/samples") assert len(files) > 0 + + +@pytest.mark.sftp +def test_filesystem_sftp_auth_ca_signed_pub_key(): + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "billy" + os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path( + "billy" + ) # billy_rsa-cert.pub is automatically loaded too + + config = get_config() + fs, _ = fsspec_from_config(config) + + files = fs.ls("/data/samples") + + assert len(files) > 0 From 08f07d33c7dcecf9885dfb361160bf7c143bb414 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:46:58 +0200 Subject: [PATCH 32/41] fix: poetry lock regenerated --- poetry.lock | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index c9bd601c42..e845e5e672 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "about-time" @@ -3789,6 +3789,106 @@ files = [ {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f4d4f0823e8b2f6952a145295b1ff25245ce9bb136aff6fe86452e507d4c1dd"}, {file = "google_re2-1.1-4-cp39-cp39-win32.whl", hash = "sha256:1afae56b2a07bb48cfcfefaa15ed85bae26a68f5dc7f9e128e6e6ea36914e847"}, {file = "google_re2-1.1-4-cp39-cp39-win_amd64.whl", hash = "sha256:aa7d6d05911ab9c8adbf3c225a7a120ab50fd2784ac48f2f0d140c0b7afc2b55"}, + {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:222fc2ee0e40522de0b21ad3bc90ab8983be3bf3cec3d349c80d76c8bb1a4beb"}, + {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d4763b0b9195b72132a4e7de8e5a9bf1f05542f442a9115aa27cfc2a8004f581"}, + {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:209649da10c9d4a93d8a4d100ecbf9cc3b0252169426bec3e8b4ad7e57d600cf"}, + {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:68813aa333c1604a2df4a495b2a6ed065d7c8aebf26cc7e7abb5a6835d08353c"}, + {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:370a23ec775ad14e9d1e71474d56f381224dcf3e72b15d8ca7b4ad7dd9cd5853"}, + {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:14664a66a3ddf6bc9e56f401bf029db2d169982c53eff3f5876399104df0e9a6"}, + {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea3722cc4932cbcebd553b69dce1b4a73572823cff4e6a244f1c855da21d511"}, + {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e14bb264c40fd7c627ef5678e295370cd6ba95ca71d835798b6e37502fc4c690"}, + {file = "google_re2-1.1-5-cp310-cp310-win32.whl", hash = "sha256:39512cd0151ea4b3969c992579c79b423018b464624ae955be685fc07d94556c"}, + {file = "google_re2-1.1-5-cp310-cp310-win_amd64.whl", hash = "sha256:ac66537aa3bc5504320d922b73156909e3c2b6da19739c866502f7827b3f9fdf"}, + {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b5ea68d54890c9edb1b930dcb2658819354e5d3f2201f811798bbc0a142c2b4"}, + {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:33443511b6b83c35242370908efe2e8e1e7cae749c766b2b247bf30e8616066c"}, + {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:413d77bdd5ba0bfcada428b4c146e87707452ec50a4091ec8e8ba1413d7e0619"}, + {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:5171686e43304996a34baa2abcee6f28b169806d0e583c16d55e5656b092a414"}, + {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b284db130283771558e31a02d8eb8fb756156ab98ce80035ae2e9e3a5f307c4"}, + {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:296e6aed0b169648dc4b870ff47bd34c702a32600adb9926154569ef51033f47"}, + {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38d50e68ead374160b1e656bbb5d101f0b95fb4cc57f4a5c12100155001480c5"}, + {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a0416a35921e5041758948bcb882456916f22845f66a93bc25070ef7262b72a"}, + {file = "google_re2-1.1-5-cp311-cp311-win32.whl", hash = "sha256:a1d59568bbb5de5dd56dd6cdc79907db26cce63eb4429260300c65f43469e3e7"}, + {file = "google_re2-1.1-5-cp311-cp311-win_amd64.whl", hash = "sha256:72f5a2f179648b8358737b2b493549370debd7d389884a54d331619b285514e3"}, + {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:cbc72c45937b1dc5acac3560eb1720007dccca7c9879138ff874c7f6baf96005"}, + {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5fadd1417fbef7235fa9453dba4eb102e6e7d94b1e4c99d5fa3dd4e288d0d2ae"}, + {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:040f85c63cc02696485b59b187a5ef044abe2f99b92b4fb399de40b7d2904ccc"}, + {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:64e3b975ee6d9bbb2420494e41f929c1a0de4bcc16d86619ab7a87f6ea80d6bd"}, + {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8ee370413e00f4d828eaed0e83b8af84d7a72e8ee4f4bd5d3078bc741dfc430a"}, + {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:5b89383001079323f693ba592d7aad789d7a02e75adb5d3368d92b300f5963fd"}, + {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:63cb4fdfbbda16ae31b41a6388ea621510db82feb8217a74bf36552ecfcd50ad"}, + {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ebedd84ae8be10b7a71a16162376fd67a2386fe6361ef88c622dcf7fd679daf"}, + {file = "google_re2-1.1-5-cp312-cp312-win32.whl", hash = "sha256:c8e22d1692bc2c81173330c721aff53e47ffd3c4403ff0cd9d91adfd255dd150"}, + {file = "google_re2-1.1-5-cp312-cp312-win_amd64.whl", hash = "sha256:5197a6af438bb8c4abda0bbe9c4fbd6c27c159855b211098b29d51b73e4cbcf6"}, + {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b6727e0b98417e114b92688ad2aa256102ece51f29b743db3d831df53faf1ce3"}, + {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:711e2b6417eb579c61a4951029d844f6b95b9b373b213232efd413659889a363"}, + {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:71ae8b3df22c5c154c8af0f0e99d234a450ef1644393bc2d7f53fc8c0a1e111c"}, + {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:94a04e214bc521a3807c217d50cf099bbdd0c0a80d2d996c0741dbb995b5f49f"}, + {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:a770f75358508a9110c81a1257721f70c15d9bb592a2fb5c25ecbd13566e52a5"}, + {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:07c9133357f7e0b17c6694d5dcb82e0371f695d7c25faef2ff8117ef375343ff"}, + {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:204ca6b1cf2021548f4a9c29ac015e0a4ab0a7b6582bf2183d838132b60c8fda"}, + {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0b95857c2c654f419ca684ec38c9c3325c24e6ba7d11910a5110775a557bb18"}, + {file = "google_re2-1.1-5-cp38-cp38-win32.whl", hash = "sha256:347ac770e091a0364e822220f8d26ab53e6fdcdeaec635052000845c5a3fb869"}, + {file = "google_re2-1.1-5-cp38-cp38-win_amd64.whl", hash = "sha256:ec32bb6de7ffb112a07d210cf9f797b7600645c2d5910703fa07f456dd2150e0"}, + {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb5adf89060f81c5ff26c28e261e6b4997530a923a6093c9726b8dec02a9a326"}, + {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a22630c9dd9ceb41ca4316bccba2643a8b1d5c198f21c00ed5b50a94313aaf10"}, + {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:544dc17fcc2d43ec05f317366375796351dec44058e1164e03c3f7d050284d58"}, + {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:19710af5ea88751c7768575b23765ce0dfef7324d2539de576f75cdc319d6654"}, + {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:f82995a205e08ad896f4bd5ce4847c834fab877e1772a44e5f262a647d8a1dec"}, + {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:63533c4d58da9dc4bc040250f1f52b089911699f0368e0e6e15f996387a984ed"}, + {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79e00fcf0cb04ea35a22b9014712d448725ce4ddc9f08cc818322566176ca4b0"}, + {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc41afcefee2da6c4ed883a93d7f527c4b960cd1d26bbb0020a7b8c2d341a60a"}, + {file = "google_re2-1.1-5-cp39-cp39-win32.whl", hash = "sha256:486730b5e1f1c31b0abc6d80abe174ce4f1188fe17d1b50698f2bf79dc6e44be"}, + {file = "google_re2-1.1-5-cp39-cp39-win_amd64.whl", hash = "sha256:4de637ca328f1d23209e80967d1b987d6b352cd01b3a52a84b4d742c69c3da6c"}, + {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:621e9c199d1ff0fdb2a068ad450111a84b3bf14f96dfe5a8a7a0deae5f3f4cce"}, + {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:220acd31e7dde95373f97c3d1f3b3bd2532b38936af28b1917ee265d25bebbf4"}, + {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:db34e1098d164f76251a6ece30e8f0ddfd65bb658619f48613ce71acb3f9cbdb"}, + {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:5152bac41d8073977582f06257219541d0fc46ad99b0bbf30e8f60198a43b08c"}, + {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:6191294799e373ee1735af91f55abd23b786bdfd270768a690d9d55af9ea1b0d"}, + {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:070cbafbb4fecbb02e98feb28a1eb292fb880f434d531f38cc33ee314b521f1f"}, + {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8437d078b405a59a576cbed544490fe041140f64411f2d91012e8ec05ab8bf86"}, + {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f00f9a9af8896040e37896d9b9fc409ad4979f1ddd85bb188694a7d95ddd1164"}, + {file = "google_re2-1.1-6-cp310-cp310-win32.whl", hash = "sha256:df26345f229a898b4fd3cafd5f82259869388cee6268fc35af16a8e2293dd4e5"}, + {file = "google_re2-1.1-6-cp310-cp310-win_amd64.whl", hash = "sha256:3665d08262c57c9b28a5bdeb88632ad792c4e5f417e5645901695ab2624f5059"}, + {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b26b869d8aa1d8fe67c42836bf3416bb72f444528ee2431cfb59c0d3e02c6ce3"}, + {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:41fd4486c57dea4f222a6bb7f1ff79accf76676a73bdb8da0fcbd5ba73f8da71"}, + {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0ee378e2e74e25960070c338c28192377c4dd41e7f4608f2688064bd2badc41e"}, + {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:a00cdbf662693367b36d075b29feb649fd7ee1b617cf84f85f2deebeda25fc64"}, + {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4c09455014217a41499432b8c8f792f25f3df0ea2982203c3a8c8ca0e7895e69"}, + {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6501717909185327935c7945e23bb5aa8fc7b6f237b45fe3647fa36148662158"}, + {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3510b04790355f199e7861c29234081900e1e1cbf2d1484da48aa0ba6d7356ab"}, + {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c0e64c187ca406764f9e9ad6e750d62e69ed8f75bf2e865d0bfbc03b642361c"}, + {file = "google_re2-1.1-6-cp311-cp311-win32.whl", hash = "sha256:2a199132350542b0de0f31acbb3ca87c3a90895d1d6e5235f7792bb0af02e523"}, + {file = "google_re2-1.1-6-cp311-cp311-win_amd64.whl", hash = "sha256:83bdac8ceaece8a6db082ea3a8ba6a99a2a1ee7e9f01a9d6d50f79c6f251a01d"}, + {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:81985ff894cd45ab5a73025922ac28c0707759db8171dd2f2cc7a0e856b6b5ad"}, + {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5635af26065e6b45456ccbea08674ae2ab62494008d9202df628df3b267bc095"}, + {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:813b6f04de79f4a8fdfe05e2cb33e0ccb40fe75d30ba441d519168f9d958bd54"}, + {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:5ec2f5332ad4fd232c3f2d6748c2c7845ccb66156a87df73abcc07f895d62ead"}, + {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5a687b3b32a6cbb731647393b7c4e3fde244aa557f647df124ff83fb9b93e170"}, + {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:39a62f9b3db5d3021a09a47f5b91708b64a0580193e5352751eb0c689e4ad3d7"}, + {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ca0f0b45d4a1709cbf5d21f355e5809ac238f1ee594625a1e5ffa9ff7a09eb2b"}, + {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a64b3796a7a616c7861247bd061c9a836b5caf0d5963e5ea8022125601cf7b09"}, + {file = "google_re2-1.1-6-cp312-cp312-win32.whl", hash = "sha256:32783b9cb88469ba4cd9472d459fe4865280a6b1acdad4480a7b5081144c4eb7"}, + {file = "google_re2-1.1-6-cp312-cp312-win_amd64.whl", hash = "sha256:259ff3fd2d39035b9cbcbf375995f83fa5d9e6a0c5b94406ff1cc168ed41d6c6"}, + {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:e4711bcffe190acd29104d8ecfea0c0e42b754837de3fb8aad96e6cc3c613cdc"}, + {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4d081cce43f39c2e813fe5990e1e378cbdb579d3f66ded5bade96130269ffd75"}, + {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:4f123b54d48450d2d6b14d8fad38e930fb65b5b84f1b022c10f2913bd956f5b5"}, + {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:e1928b304a2b591a28eb3175f9db7f17c40c12cf2d4ec2a85fdf1cc9c073ff91"}, + {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:3a69f76146166aec1173003c1f547931bdf288c6b135fda0020468492ac4149f"}, + {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:fc08c388f4ebbbca345e84a0c56362180d33d11cbe9ccfae663e4db88e13751e"}, + {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b057adf38ce4e616486922f2f47fc7d19c827ba0a7f69d540a3664eba2269325"}, + {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4138c0b933ab099e96f5d8defce4486f7dfd480ecaf7f221f2409f28022ccbc5"}, + {file = "google_re2-1.1-6-cp38-cp38-win32.whl", hash = "sha256:9693e45b37b504634b1abbf1ee979471ac6a70a0035954592af616306ab05dd6"}, + {file = "google_re2-1.1-6-cp38-cp38-win_amd64.whl", hash = "sha256:5674d437baba0ea287a5a7f8f81f24265d6ae8f8c09384e2ef7b6f84b40a7826"}, + {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7783137cb2e04f458a530c6d0ee9ef114815c1d48b9102f023998c371a3b060e"}, + {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a49b7153935e7a303675f4deb5f5d02ab1305adefc436071348706d147c889e0"}, + {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:a96a8bb309182090704593c60bdb369a2756b38fe358bbf0d40ddeb99c71769f"}, + {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:dff3d4be9f27ef8ec3705eed54f19ef4ab096f5876c15fe011628c69ba3b561c"}, + {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:40f818b0b39e26811fa677978112a8108269977fdab2ba0453ac4363c35d9e66"}, + {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:8a7e53538cdb40ef4296017acfbb05cab0c19998be7552db1cfb85ba40b171b9"}, + {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ee18e7569fb714e5bb8c42809bf8160738637a5e71ed5a4797757a1fb4dc4de"}, + {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1cda4f6d1a7d5b43ea92bc395f23853fba0caf8b1e1efa6e8c48685f912fcb89"}, + {file = "google_re2-1.1-6-cp39-cp39-win32.whl", hash = "sha256:6a9cdbdc36a2bf24f897be6a6c85125876dc26fea9eb4247234aec0decbdccfd"}, + {file = "google_re2-1.1-6-cp39-cp39-win_amd64.whl", hash = "sha256:73f646cecfad7cc5b4330b4192c25f2e29730a3b8408e089ffd2078094208196"}, ] [[package]] @@ -9757,4 +9857,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "0a6a967487e09eba41371f40abd2ced88c83621d82320e09847ddb60a2fe3a4c" +content-hash = "1932fafe0cb36b059cb9ae464ebcd7e6e5b8b4f8eb2d1f014d33338cae939748" From 87ab87a63b7e304c6d935a073994c31f22bf0cf5 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 05:26:26 +0200 Subject: [PATCH 33/41] refactor: filesystem sftp tests --- .github/workflows/test_destination_sftp.yml | 88 ------------------- .github/workflows/test_destinations.yml | 2 +- .github/workflows/test_local_destinations.yml | 27 +++++- pytest.ini | 2 +- tests/.dlt/config.toml | 1 + tests/load/filesystem_sftp/__init__.py | 5 ++ .../filesystem_sftp/test_filesystem_sftp.py | 11 +-- tests/load/utils.py | 3 + 8 files changed, 36 insertions(+), 103 deletions(-) delete mode 100644 .github/workflows/test_destination_sftp.yml diff --git a/.github/workflows/test_destination_sftp.yml b/.github/workflows/test_destination_sftp.yml deleted file mode 100644 index 6eb9e63975..0000000000 --- a/.github/workflows/test_destination_sftp.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: dest | sftp filesystem - -on: - pull_request: - branches: - - master - - devel - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -env: - # NOTE: this workflow can't use github secrets! - # DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }} - - RUNTIME__SENTRY_DSN: https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752 - RUNTIME__LOG_LEVEL: ERROR - RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} - ACTIVE_DESTINATIONS: "[\"filesystem\"]" - ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\"]" - -jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml - - run_loader: - name: dest | sftp filesystem - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' - strategy: - fail-fast: false - defaults: - run: - shell: bash - runs-on: "ubuntu-latest" - - steps: - - name: Check out - uses: actions/checkout@master - - - name: Start SFTP server - run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d - - - name: Configure local SSH agent for tests - run: | - mkdir -p /home/runner/.ssh - cp tests/load/filesystem_sftp/bootstrap/bobby_rsa /home/runner/.ssh/id_rsa - cp tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub /home/runner/.ssh/id_rsa.pub - eval "$(ssh-agent -s)" - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: "3.10.x" - - - name: Install Poetry - uses: snok/install-poetry@v1.3.2 - with: - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v3 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp - - - name: Install dependencies - run: poetry install --no-interaction -E filesystem --with sentry-sdk --with pipeline - - - name: create secrets.toml - run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml - - - name: Run SFTP Linux tests - run: | - eval "$(ssh-agent -s)" - ls -lha /home/runner/.ssh - poetry run pytest tests/load/filesystem_sftp -m "sftp" - - - - name: Stop SFTP server - if: always() - run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v diff --git a/.github/workflows/test_destinations.yml b/.github/workflows/test_destinations.yml index 4cd83a1a21..fc7eeadfe2 100644 --- a/.github/workflows/test_destinations.yml +++ b/.github/workflows/test_destinations.yml @@ -87,6 +87,6 @@ jobs: if: ${{ ! (contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule')}} - run: | - poetry run pytest tests/load --ignore tests/load/sources -m "not sftp" + poetry run pytest tests/load --ignore tests/load/sources name: Run all tests Linux if: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule'}} diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml index 5b48f75dbc..44079e1808 100644 --- a/.github/workflows/test_local_destinations.yml +++ b/.github/workflows/test_local_destinations.yml @@ -22,7 +22,7 @@ env: RUNTIME__LOG_LEVEL: ERROR RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} ACTIVE_DESTINATIONS: "[\"duckdb\", \"postgres\", \"filesystem\", \"weaviate\", \"qdrant\"]" - ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\"]" + ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\", \"sftp\"]" DESTINATION__WEAVIATE__VECTORIZER: text2vec-contextionary DESTINATION__WEAVIATE__MODULE_CONFIG: "{\"text2vec-contextionary\": {\"vectorizeClassName\": false, \"vectorizePropertyName\": true}}" @@ -96,15 +96,34 @@ jobs: - name: Install dependencies run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant --with sentry-sdk --with pipeline -E deltalake - + + - name: Start SFTP server + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d + + - name: Configure SSH Agent for sftp tests + run: | + mkdir -p /home/runner/.ssh + cp tests/load/filesystem_sftp/bootstrap/bobby_rsa /home/runner/.ssh/id_rsa + cp tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub /home/runner/.ssh/id_rsa.pub + # always run full suite, also on branches - - run: poetry run pytest tests/load --ignore tests/load/sources -m "not sftp" && poetry run pytest tests/cli - name: Run tests Linux + - name: Run tests Linux + run: | + eval "$(ssh-agent -s)" + poetry run pytest tests/load --ignore tests/load/sources + poetry run pytest tests/cli env: DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data DESTINATION__QDRANT__CREDENTIALS__location: http://localhost:6333 + DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT: 2222 + DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME: foo + DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD: pass - name: Stop weaviate if: always() run: docker compose -f ".github/weaviate-compose.yml" down -v + + - name: Stop SFTP server + if: always() + run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index d6e85f8863..4f033f672c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -12,4 +12,4 @@ markers = essential: marks all essential tests no_load: marks tests that do not load anything needspyarrow17: marks tests that need pyarrow>=17.0.0 (deselected by default) - sftp: marks all sftp tests \ No newline at end of file + \ No newline at end of file diff --git a/tests/.dlt/config.toml b/tests/.dlt/config.toml index 292175569b..62bfbc7680 100644 --- a/tests/.dlt/config.toml +++ b/tests/.dlt/config.toml @@ -10,4 +10,5 @@ bucket_url_abfss="abfss://dlt-ci-test-bucket@dltdata.dfs.core.windows.net" bucket_url_r2="s3://dlt-ci-test-bucket" # use "/" as root path bucket_url_gdrive="gdrive://15eC3e5MNew2XAIefWNlG8VlEa0ISnnaG" +bucket_url_sftp="sftp://localhost/data" memory="memory:///m" \ No newline at end of file diff --git a/tests/load/filesystem_sftp/__init__.py b/tests/load/filesystem_sftp/__init__.py index 0d23f8002b..b5aa973d4b 100644 --- a/tests/load/filesystem_sftp/__init__.py +++ b/tests/load/filesystem_sftp/__init__.py @@ -1,3 +1,8 @@ +import pytest from tests.utils import skip_if_not_active +from tests.load.utils import ALL_FILESYSTEM_DRIVERS skip_if_not_active("filesystem") + +if "sftp" not in ALL_FILESYSTEM_DRIVERS: + pytest.skip("sftp filesystem driver not configured", allow_module_level=True) diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index d388952668..33051a1f0a 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -62,7 +62,6 @@ def sftp_filesystem(): yield fs -@pytest.mark.sftp def test_filesystem_sftp_server(sftp_filesystem): test_file = "/data/countries.json" input_data = { @@ -92,7 +91,6 @@ def test_filesystem_sftp_server(sftp_filesystem): fs.rm(test_file) -@pytest.mark.sftp def test_filesystem_sftp_write(sftp_filesystem): import posixpath import pyarrow.parquet as pq @@ -123,7 +121,6 @@ def states(): assert sorted(result_states) == sorted(expected_states) -@pytest.mark.sftp @pytest.mark.parametrize("load_content", (True, False)) @pytest.mark.parametrize("glob_filter", ("**", "**/*.csv", "*.txt", "met_csv/A803/*.csv")) def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None: @@ -146,7 +143,6 @@ def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None: assert_sample_files(all_file_items, fs, config, load_content, glob_filter) -@pytest.mark.sftp def test_filesystem_sftp_auth_useranme_password(): os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" @@ -160,7 +156,6 @@ def test_filesystem_sftp_auth_useranme_password(): assert len(files) > 0 -@pytest.mark.sftp def test_filesystem_sftp_auth_private_key(): os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" @@ -175,7 +170,6 @@ def test_filesystem_sftp_auth_private_key(): assert len(files) > 0 -@pytest.mark.sftp def test_filesystem_sftp_auth_private_key_protected(): os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" @@ -191,11 +185,11 @@ def test_filesystem_sftp_auth_private_key_protected(): assert len(files) > 0 -# Test requires - ssh_agent with user's bobby key loaded. The commands required are: +# Test requires - ssh_agent with user's bobby key loaded. The commands and file names required are: # eval "$(ssh-agent -s)" # cp /path/to/tests/load/filesystem_sftp/bobby_rsa* ~/.ssh/id_rsa # cp /path/to/tests/load/filesystem_sftp/bobby_rsa.pub ~/.ssh/id_rsa.pub -@pytest.mark.sftp +# ssh-add ~/.ssh/id_rsa @pytest.mark.skipif( not is_ssh_agent_ready(), reason="SSH agent is not running or bobby's private key isn't stored in ~/.ssh/id_rsa", @@ -214,7 +208,6 @@ def test_filesystem_sftp_auth_private_ssh_agent(): assert len(files) > 0 -@pytest.mark.sftp def test_filesystem_sftp_auth_ca_signed_pub_key(): os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" diff --git a/tests/load/utils.py b/tests/load/utils.py index 0eaf68d8f8..7827449964 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -75,6 +75,7 @@ GDRIVE_BUCKET = dlt.config.get("tests.bucket_url_gdrive", str) FILE_BUCKET = dlt.config.get("tests.bucket_url_file", str) R2_BUCKET = dlt.config.get("tests.bucket_url_r2", str) +SFTP_BUCKET = dlt.config.get("tests.bucket_url_sftp", str) MEMORY_BUCKET = dlt.config.get("tests.memory", str) ALL_FILESYSTEM_DRIVERS = dlt.config.get("ALL_FILESYSTEM_DRIVERS", list) or [ @@ -86,6 +87,7 @@ "file", "memory", "r2", + "sftp", ] # Filter out buckets not in all filesystem drivers @@ -97,6 +99,7 @@ ABFS_BUCKET, AZ_BUCKET, GDRIVE_BUCKET, + SFTP_BUCKET, ] WITH_GDRIVE_BUCKETS = [ bucket From f4a48c84e1dc475d5e544986288943442ecbf302 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 06:06:49 +0200 Subject: [PATCH 34/41] fix: filesystem tests for sftp --- dlt/common/storages/configuration.py | 2 +- .../load/filesystem/test_filesystem_client.py | 6 ++- tests/load/filesystem_sftp/docker-compose.yml | 6 +-- .../filesystem_sftp/test_filesystem_sftp.py | 46 ++++--------------- 4 files changed, 18 insertions(+), 42 deletions(-) diff --git a/dlt/common/storages/configuration.py b/dlt/common/storages/configuration.py index 149e9729c6..4da44bceee 100644 --- a/dlt/common/storages/configuration.py +++ b/dlt/common/storages/configuration.py @@ -59,7 +59,7 @@ class LoadStorageConfiguration(BaseConfiguration): def _make_sftp_url(scheme: str, fs_path: str, bucket_url: str) -> str: parsed_bucket_url = urlparse(bucket_url) - return f"{scheme}://{parsed_bucket_url.hostname}/{fs_path}" + return f"{scheme}://{parsed_bucket_url.hostname}{fs_path}" def _make_az_url(scheme: str, fs_path: str, bucket_url: str) -> str: diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index f16e75c7e6..fdd68d4683 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -194,9 +194,10 @@ def test_replace_write_disposition(layout: str, default_buckets_env: str) -> Non # First file from load1 remains, second file is replaced by load2 # assert that only these two files are in the destination folder + is_sftp = urlparse(default_buckets_env).scheme == "sftp" paths = [] for basedir, _dirs, files in client.fs_client.walk( - client.dataset_path, detail=False, refresh=True + client.dataset_path, detail=False, **({"refresh": True} if not is_sftp else {}) ): # remove internal paths if "_dlt" in basedir: @@ -257,9 +258,10 @@ def test_append_write_disposition(layout: str, default_buckets_env: str) -> None ] expected_files = sorted([Path(posixpath.join(root_path, fn)) for fn in expected_files]) # type: ignore[misc] + is_sftp = urlparse(default_buckets_env).scheme == "sftp" paths = [] for basedir, _dirs, files in client.fs_client.walk( - client.dataset_path, detail=False, refresh=True + client.dataset_path, detail=False, **({"refresh": True} if not is_sftp else {}) ): # remove internal paths if "_dlt" in basedir: diff --git a/tests/load/filesystem_sftp/docker-compose.yml b/tests/load/filesystem_sftp/docker-compose.yml index 3421b316ca..a714219146 100644 --- a/tests/load/filesystem_sftp/docker-compose.yml +++ b/tests/load/filesystem_sftp/docker-compose.yml @@ -12,9 +12,9 @@ services: ports: - "2222:22" volumes: - - ../../common/storages/samples:/home/foo/sftp/data/samples - - ../../common/storages/samples:/home/bobby/sftp/data/samples - - ../../common/storages/samples:/home/billy/sftp/data/samples + - ../../common/storages/samples:/home/foo/sftp/data/standard_source/samples + - ../../common/storages/samples:/home/bobby/sftp/data/standard_source/samples + - ../../common/storages/samples:/home/billy/sftp/data/standard_source/samples networks: sftpserver: diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index 33051a1f0a..a47c433749 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -6,11 +6,8 @@ from dlt.common.json import json from dlt.common.configuration.inject import with_config from dlt.common.storages import FilesystemConfiguration, fsspec_from_config -from dlt.common.storages.fsspec_filesystem import glob_files from dlt.destinations.impl.filesystem.filesystem import FilesystemClient -from tests.common.storages.utils import assert_sample_files - @with_config(spec=FilesystemConfiguration, sections=("sources", "filesystem")) def get_config(config: FilesystemConfiguration = None) -> FilesystemConfiguration: @@ -121,30 +118,8 @@ def states(): assert sorted(result_states) == sorted(expected_states) -@pytest.mark.parametrize("load_content", (True, False)) -@pytest.mark.parametrize("glob_filter", ("**", "**/*.csv", "*.txt", "met_csv/A803/*.csv")) -def test_filesystem_sftp_read(load_content: bool, glob_filter: str) -> None: - # docker volume mount on: /home/foo/sftp/data/samples but /data/samples is the path in the SFTP server - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path() - - config = get_config() - fs, _ = fsspec_from_config(config) - - files = fs.ls("/data/samples") - - assert len(files) > 0 - # use glob to get data - all_file_items = list(glob_files(fs, config.bucket_url, file_glob=glob_filter)) - - print(all_file_items) - assert_sample_files(all_file_items, fs, config, load_content, glob_filter) - - def test_filesystem_sftp_auth_useranme_password(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/standard_source/samples" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "pass" @@ -152,12 +127,12 @@ def test_filesystem_sftp_auth_useranme_password(): config = get_config() fs, _ = fsspec_from_config(config) - files = fs.ls("/data/samples") + files = fs.ls("/data/standard_source/samples") assert len(files) > 0 def test_filesystem_sftp_auth_private_key(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path() @@ -165,13 +140,12 @@ def test_filesystem_sftp_auth_private_key(): config = get_config() fs, _ = fsspec_from_config(config) - files = fs.ls("/data/samples") - + files = fs.ls("/data/standard_source/samples") assert len(files) > 0 def test_filesystem_sftp_auth_private_key_protected(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "bobby" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path("bobby") @@ -180,7 +154,7 @@ def test_filesystem_sftp_auth_private_key_protected(): config = get_config() fs, _ = fsspec_from_config(config) - files = fs.ls("/data/samples") + files = fs.ls("/data/standard_source/samples") assert len(files) > 0 @@ -195,7 +169,7 @@ def test_filesystem_sftp_auth_private_key_protected(): reason="SSH agent is not running or bobby's private key isn't stored in ~/.ssh/id_rsa", ) def test_filesystem_sftp_auth_private_ssh_agent(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "bobby" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "passphrase123" @@ -203,13 +177,13 @@ def test_filesystem_sftp_auth_private_ssh_agent(): config = get_config() fs, _ = fsspec_from_config(config) - files = fs.ls("/data/samples") + files = fs.ls("/data/standard_source/samples") assert len(files) > 0 def test_filesystem_sftp_auth_ca_signed_pub_key(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/samples" + os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "billy" os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path( @@ -219,6 +193,6 @@ def test_filesystem_sftp_auth_ca_signed_pub_key(): config = get_config() fs, _ = fsspec_from_config(config) - files = fs.ls("/data/samples") + files = fs.ls("/data/standard_source/samples") assert len(files) > 0 From a94cb365d7c78c1ee82d2980f4a05765b54d1b5d Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 06:46:39 +0200 Subject: [PATCH 35/41] refactor: reduce redundancy --- .../filesystem_sftp/test_filesystem_sftp.py | 103 +++++++++--------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index a47c433749..a6821722cb 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -88,7 +88,33 @@ def test_filesystem_sftp_server(sftp_filesystem): fs.rm(test_file) -def test_filesystem_sftp_write(sftp_filesystem): +def test_filesystem_sftp_pipeline(sftp_filesystem): + os.environ.update( + { + "DESTINATION__FILESYSTEM__BUCKET_URL": "sftp://localhost/data", + "DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT": "2222", + "DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME": "foo", + "DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD": "pass", + } + ) + pipeline = dlt.pipeline(destination="filesystem", dataset_name="test") + pipeline.run( + [ + dlt.resource( + lambda: [{"id": 1, "name": "DE"}, {"id": 2, "name": "AK"}, {"id": 3, "name": "CA"}] + ) + ], + loader_file_format="parquet", + ) + client = pipeline.destination_client() # type: ignore[assignment] + data_files = client.fs_client.glob(posixpath.join(client.dataset_path, "states/*")) + assert len(data_files) > 0 + assert sorted( + [r["name"] for r in pq.read_table(sftp_filesystem.open(data_files[0], "rb")).to_pylist()] + ) == ["AK", "CA", "DE"] + + +def test_filesystem_sftp_pipeline(sftp_filesystem): import posixpath import pyarrow.parquet as pq @@ -118,45 +144,37 @@ def states(): assert sorted(result_states) == sorted(expected_states) -def test_filesystem_sftp_auth_useranme_password(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost/data/standard_source/samples" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "pass" - - config = get_config() - fs, _ = fsspec_from_config(config) +def run_sftp_auth(user, password=None, key=None, passphrase=None): + env_vars = { + "SOURCES__FILESYSTEM__BUCKET_URL": "sftp://localhost", + "SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT": "2222", + "SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME": user, + } - files = fs.ls("/data/standard_source/samples") - assert len(files) > 0 + if password: + env_vars["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = password + if key: + env_vars["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path(user) + if passphrase: + env_vars["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_PASSPHRASE"] = passphrase - -def test_filesystem_sftp_auth_private_key(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "foo" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path() + os.environ.update(env_vars) config = get_config() fs, _ = fsspec_from_config(config) + assert len(fs.ls("/data/standard_source/samples")) > 0 - files = fs.ls("/data/standard_source/samples") - assert len(files) > 0 +def test_filesystem_sftp_auth_useranme_password(): + run_sftp_auth("foo", "pass") -def test_filesystem_sftp_auth_private_key_protected(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "bobby" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path("bobby") - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_PASSPHRASE"] = "passphrase123" - config = get_config() - fs, _ = fsspec_from_config(config) +def test_filesystem_sftp_auth_private_key(): + run_sftp_auth("foo", key=get_key_path()) - files = fs.ls("/data/standard_source/samples") - assert len(files) > 0 +def test_filesystem_sftp_auth_private_key_protected(): + run_sftp_auth("bobby", key=get_key_path("bobby"), passphrase="passphrase123") # Test requires - ssh_agent with user's bobby key loaded. The commands and file names required are: @@ -169,30 +187,9 @@ def test_filesystem_sftp_auth_private_key_protected(): reason="SSH agent is not running or bobby's private key isn't stored in ~/.ssh/id_rsa", ) def test_filesystem_sftp_auth_private_ssh_agent(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "bobby" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD"] = "passphrase123" - - config = get_config() - fs, _ = fsspec_from_config(config) - - files = fs.ls("/data/standard_source/samples") - - assert len(files) > 0 + run_sftp_auth("bobby", passphrase="passphrase123") def test_filesystem_sftp_auth_ca_signed_pub_key(): - os.environ["SOURCES__FILESYSTEM__BUCKET_URL"] = "sftp://localhost" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT"] = "2222" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_USERNAME"] = "billy" - os.environ["SOURCES__FILESYSTEM__CREDENTIALS__SFTP_KEY_FILENAME"] = get_key_path( - "billy" - ) # billy_rsa-cert.pub is automatically loaded too - - config = get_config() - fs, _ = fsspec_from_config(config) - - files = fs.ls("/data/standard_source/samples") - - assert len(files) > 0 + # billy_rsa-cert.pub is automatically loaded too + run_sftp_auth("billy", key=get_key_path("billy")) From b7eb78a11b6e99527188f9b7ad867459959dea11 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 06:54:37 +0200 Subject: [PATCH 36/41] fix: lint and remove duplicated test --- .../filesystem_sftp/test_filesystem_sftp.py | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index a6821722cb..6c226910db 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -88,32 +88,6 @@ def test_filesystem_sftp_server(sftp_filesystem): fs.rm(test_file) -def test_filesystem_sftp_pipeline(sftp_filesystem): - os.environ.update( - { - "DESTINATION__FILESYSTEM__BUCKET_URL": "sftp://localhost/data", - "DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT": "2222", - "DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME": "foo", - "DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD": "pass", - } - ) - pipeline = dlt.pipeline(destination="filesystem", dataset_name="test") - pipeline.run( - [ - dlt.resource( - lambda: [{"id": 1, "name": "DE"}, {"id": 2, "name": "AK"}, {"id": 3, "name": "CA"}] - ) - ], - loader_file_format="parquet", - ) - client = pipeline.destination_client() # type: ignore[assignment] - data_files = client.fs_client.glob(posixpath.join(client.dataset_path, "states/*")) - assert len(data_files) > 0 - assert sorted( - [r["name"] for r in pq.read_table(sftp_filesystem.open(data_files[0], "rb")).to_pylist()] - ) == ["AK", "CA", "DE"] - - def test_filesystem_sftp_pipeline(sftp_filesystem): import posixpath import pyarrow.parquet as pq From 234c37794296bc7afef676ab9877355c1da49152 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 07:28:53 +0200 Subject: [PATCH 37/41] chore: change ubuntu version --- tests/load/filesystem_sftp/bootstrap/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/load/filesystem_sftp/bootstrap/Dockerfile b/tests/load/filesystem_sftp/bootstrap/Dockerfile index 76866c3b9e..a6c75d8c88 100644 --- a/tests/load/filesystem_sftp/bootstrap/Dockerfile +++ b/tests/load/filesystem_sftp/bootstrap/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:latest +FROM ubuntu:noble ENV DEBIAN_FRONTEND=noninteractive @@ -111,9 +111,6 @@ RUN echo "Match User billy" >> /etc/ssh/sshd_config && \ echo " AllowTcpForwarding no" >> /etc/ssh/sshd_config && \ echo " X11Forwarding no" >> /etc/ssh/sshd_config - - - EXPOSE 22 # run sshd on container start From 187a55a7173ce0e5ceb4f38127b0910f34630669 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 07:41:57 +0200 Subject: [PATCH 38/41] fix: enforce test marker --- .github/workflows/test_pyarrow17.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_pyarrow17.yml b/.github/workflows/test_pyarrow17.yml index dd48c2af9d..896c7152f7 100644 --- a/.github/workflows/test_pyarrow17.yml +++ b/.github/workflows/test_pyarrow17.yml @@ -72,6 +72,7 @@ jobs: - name: create secrets.toml run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml - - run: | - poetry run pytest tests/libs tests/load -m needspyarrow17 - name: Run needspyarrow17 tests Linux + - name: Run needspyarrow17 tests Linux + run: | + poetry run pytest tests/libs -m "needspyarrow17" + poetry run pytest tests/load -m "needspyarrow17" From 329c2ef6413a7889cf2f8802b522b73e055d9e36 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 07:53:26 +0200 Subject: [PATCH 39/41] fix: ignore sftp tests --- .github/workflows/test_pyarrow17.yml | 1 + tests/load/filesystem_sftp/__init__.py | 5 ----- tests/load/filesystem_sftp/test_filesystem_sftp.py | 5 +++++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_pyarrow17.yml b/.github/workflows/test_pyarrow17.yml index 896c7152f7..78d6742ac1 100644 --- a/.github/workflows/test_pyarrow17.yml +++ b/.github/workflows/test_pyarrow17.yml @@ -23,6 +23,7 @@ env: RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} ACTIVE_DESTINATIONS: "[\"filesystem\"]" + ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\", \"r2\", \"s3\", \"gs\", \"az\", \"abfss\", \"gdrive\"]" #excludes sftp jobs: get_docs_changes: diff --git a/tests/load/filesystem_sftp/__init__.py b/tests/load/filesystem_sftp/__init__.py index b5aa973d4b..0d23f8002b 100644 --- a/tests/load/filesystem_sftp/__init__.py +++ b/tests/load/filesystem_sftp/__init__.py @@ -1,8 +1,3 @@ -import pytest from tests.utils import skip_if_not_active -from tests.load.utils import ALL_FILESYSTEM_DRIVERS skip_if_not_active("filesystem") - -if "sftp" not in ALL_FILESYSTEM_DRIVERS: - pytest.skip("sftp filesystem driver not configured", allow_module_level=True) diff --git a/tests/load/filesystem_sftp/test_filesystem_sftp.py b/tests/load/filesystem_sftp/test_filesystem_sftp.py index 6c226910db..32869ee56e 100644 --- a/tests/load/filesystem_sftp/test_filesystem_sftp.py +++ b/tests/load/filesystem_sftp/test_filesystem_sftp.py @@ -8,6 +8,11 @@ from dlt.common.storages import FilesystemConfiguration, fsspec_from_config from dlt.destinations.impl.filesystem.filesystem import FilesystemClient +from tests.load.utils import ALL_FILESYSTEM_DRIVERS + +if "sftp" not in ALL_FILESYSTEM_DRIVERS: + pytest.skip("sftp filesystem driver not configured", allow_module_level=True) + @with_config(spec=FilesystemConfiguration, sections=("sources", "filesystem")) def get_config(config: FilesystemConfiguration = None) -> FilesystemConfiguration: From 964db4d684b9a95c52d0934aff0dba8a797a1ff5 Mon Sep 17 00:00:00 2001 From: Julian Alves <28436330+donotpush@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:53:46 +0200 Subject: [PATCH 40/41] fix: exclude sftp from filesystem tests --- .github/workflows/test_destinations.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_destinations.yml b/.github/workflows/test_destinations.yml index fc7eeadfe2..ada73b85d9 100644 --- a/.github/workflows/test_destinations.yml +++ b/.github/workflows/test_destinations.yml @@ -30,6 +30,7 @@ env: # postgres runs again here so we can test on mac/windows ACTIVE_DESTINATIONS: "[\"redshift\", \"postgres\", \"duckdb\", \"filesystem\", \"dummy\"]" # note that all buckets are enabled for testing + ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\", \"r2\", \"s3\", \"gs\", \"az\", \"abfss\", \"gdrive\"]" #excludes sftp jobs: get_docs_changes: From 7a43d3769b7d62dac6810d806e109d9eba77216c Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 14 Sep 2024 10:32:39 +0200 Subject: [PATCH 41/41] adds sftp extra dep --- .../dlt-ecosystem/destinations/filesystem.md | 2 +- poetry.lock | 122 ++---------------- 2 files changed, 13 insertions(+), 111 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index 94ad67ba99..888cae7a35 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -269,7 +269,7 @@ bucket_url='\\?\UNC\localhost\c$\a\b\c' ::: ### SFTP -Run `pip install "dlt[paramiko]` which will install the `paramiko` package alongside `dlt`, enabling secure SFTP transfers. +Run `pip install "dlt[sftp]` which will install the `paramiko` package alongside `dlt`, enabling secure SFTP transfers. Configure your SFTP credentials by editing the `.dlt/secrets.toml` file. By default, the file contains placeholders for AWS credentials. You should replace these with your SFTP credentials. diff --git a/poetry.lock b/poetry.lock index e845e5e672..8f2ff58094 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "about-time" @@ -216,13 +216,13 @@ frozenlist = ">=1.1.0" [[package]] name = "alembic" -version = "1.12.0" +version = "1.13.2" description = "A database migration tool for SQLAlchemy." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "alembic-1.12.0-py3-none-any.whl", hash = "sha256:03226222f1cf943deee6c85d9464261a6c710cd19b4fe867a3ad1f25afda610f"}, - {file = "alembic-1.12.0.tar.gz", hash = "sha256:8e7645c32e4f200675e69f0745415335eb59a3663f5feb487abfa0b30c45888b"}, + {file = "alembic-1.13.2-py3-none-any.whl", hash = "sha256:6b8733129a6224a9a711e17c99b08462dbf7cc9670ba8f2e2ae9af860ceb1953"}, + {file = "alembic-1.13.2.tar.gz", hash = "sha256:1ff0ae32975f4fd96028c39ed9bb3c867fe3af956bd7bb37343b54c9fe7445ef"}, ] [package.dependencies] @@ -233,7 +233,7 @@ SQLAlchemy = ">=1.3.0" typing-extensions = ">=4" [package.extras] -tz = ["python-dateutil"] +tz = ["backports.zoneinfo"] [[package]] name = "alive-progress" @@ -927,7 +927,7 @@ yaml = ["PyYAML"] name = "bcrypt" version = "4.2.0" description = "Modern password hashing for your software and your servers" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "bcrypt-4.2.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:096a15d26ed6ce37a14c1ac1e48119660f21b24cba457f160a4b830f3fe6b5cb"}, @@ -3789,106 +3789,6 @@ files = [ {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f4d4f0823e8b2f6952a145295b1ff25245ce9bb136aff6fe86452e507d4c1dd"}, {file = "google_re2-1.1-4-cp39-cp39-win32.whl", hash = "sha256:1afae56b2a07bb48cfcfefaa15ed85bae26a68f5dc7f9e128e6e6ea36914e847"}, {file = "google_re2-1.1-4-cp39-cp39-win_amd64.whl", hash = "sha256:aa7d6d05911ab9c8adbf3c225a7a120ab50fd2784ac48f2f0d140c0b7afc2b55"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:222fc2ee0e40522de0b21ad3bc90ab8983be3bf3cec3d349c80d76c8bb1a4beb"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d4763b0b9195b72132a4e7de8e5a9bf1f05542f442a9115aa27cfc2a8004f581"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:209649da10c9d4a93d8a4d100ecbf9cc3b0252169426bec3e8b4ad7e57d600cf"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:68813aa333c1604a2df4a495b2a6ed065d7c8aebf26cc7e7abb5a6835d08353c"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:370a23ec775ad14e9d1e71474d56f381224dcf3e72b15d8ca7b4ad7dd9cd5853"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:14664a66a3ddf6bc9e56f401bf029db2d169982c53eff3f5876399104df0e9a6"}, - {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea3722cc4932cbcebd553b69dce1b4a73572823cff4e6a244f1c855da21d511"}, - {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e14bb264c40fd7c627ef5678e295370cd6ba95ca71d835798b6e37502fc4c690"}, - {file = "google_re2-1.1-5-cp310-cp310-win32.whl", hash = "sha256:39512cd0151ea4b3969c992579c79b423018b464624ae955be685fc07d94556c"}, - {file = "google_re2-1.1-5-cp310-cp310-win_amd64.whl", hash = "sha256:ac66537aa3bc5504320d922b73156909e3c2b6da19739c866502f7827b3f9fdf"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b5ea68d54890c9edb1b930dcb2658819354e5d3f2201f811798bbc0a142c2b4"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:33443511b6b83c35242370908efe2e8e1e7cae749c766b2b247bf30e8616066c"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:413d77bdd5ba0bfcada428b4c146e87707452ec50a4091ec8e8ba1413d7e0619"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:5171686e43304996a34baa2abcee6f28b169806d0e583c16d55e5656b092a414"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b284db130283771558e31a02d8eb8fb756156ab98ce80035ae2e9e3a5f307c4"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:296e6aed0b169648dc4b870ff47bd34c702a32600adb9926154569ef51033f47"}, - {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38d50e68ead374160b1e656bbb5d101f0b95fb4cc57f4a5c12100155001480c5"}, - {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a0416a35921e5041758948bcb882456916f22845f66a93bc25070ef7262b72a"}, - {file = "google_re2-1.1-5-cp311-cp311-win32.whl", hash = "sha256:a1d59568bbb5de5dd56dd6cdc79907db26cce63eb4429260300c65f43469e3e7"}, - {file = "google_re2-1.1-5-cp311-cp311-win_amd64.whl", hash = "sha256:72f5a2f179648b8358737b2b493549370debd7d389884a54d331619b285514e3"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:cbc72c45937b1dc5acac3560eb1720007dccca7c9879138ff874c7f6baf96005"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5fadd1417fbef7235fa9453dba4eb102e6e7d94b1e4c99d5fa3dd4e288d0d2ae"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:040f85c63cc02696485b59b187a5ef044abe2f99b92b4fb399de40b7d2904ccc"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:64e3b975ee6d9bbb2420494e41f929c1a0de4bcc16d86619ab7a87f6ea80d6bd"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8ee370413e00f4d828eaed0e83b8af84d7a72e8ee4f4bd5d3078bc741dfc430a"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:5b89383001079323f693ba592d7aad789d7a02e75adb5d3368d92b300f5963fd"}, - {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:63cb4fdfbbda16ae31b41a6388ea621510db82feb8217a74bf36552ecfcd50ad"}, - {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ebedd84ae8be10b7a71a16162376fd67a2386fe6361ef88c622dcf7fd679daf"}, - {file = "google_re2-1.1-5-cp312-cp312-win32.whl", hash = "sha256:c8e22d1692bc2c81173330c721aff53e47ffd3c4403ff0cd9d91adfd255dd150"}, - {file = "google_re2-1.1-5-cp312-cp312-win_amd64.whl", hash = "sha256:5197a6af438bb8c4abda0bbe9c4fbd6c27c159855b211098b29d51b73e4cbcf6"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b6727e0b98417e114b92688ad2aa256102ece51f29b743db3d831df53faf1ce3"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:711e2b6417eb579c61a4951029d844f6b95b9b373b213232efd413659889a363"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:71ae8b3df22c5c154c8af0f0e99d234a450ef1644393bc2d7f53fc8c0a1e111c"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:94a04e214bc521a3807c217d50cf099bbdd0c0a80d2d996c0741dbb995b5f49f"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:a770f75358508a9110c81a1257721f70c15d9bb592a2fb5c25ecbd13566e52a5"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:07c9133357f7e0b17c6694d5dcb82e0371f695d7c25faef2ff8117ef375343ff"}, - {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:204ca6b1cf2021548f4a9c29ac015e0a4ab0a7b6582bf2183d838132b60c8fda"}, - {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0b95857c2c654f419ca684ec38c9c3325c24e6ba7d11910a5110775a557bb18"}, - {file = "google_re2-1.1-5-cp38-cp38-win32.whl", hash = "sha256:347ac770e091a0364e822220f8d26ab53e6fdcdeaec635052000845c5a3fb869"}, - {file = "google_re2-1.1-5-cp38-cp38-win_amd64.whl", hash = "sha256:ec32bb6de7ffb112a07d210cf9f797b7600645c2d5910703fa07f456dd2150e0"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb5adf89060f81c5ff26c28e261e6b4997530a923a6093c9726b8dec02a9a326"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a22630c9dd9ceb41ca4316bccba2643a8b1d5c198f21c00ed5b50a94313aaf10"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:544dc17fcc2d43ec05f317366375796351dec44058e1164e03c3f7d050284d58"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:19710af5ea88751c7768575b23765ce0dfef7324d2539de576f75cdc319d6654"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:f82995a205e08ad896f4bd5ce4847c834fab877e1772a44e5f262a647d8a1dec"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:63533c4d58da9dc4bc040250f1f52b089911699f0368e0e6e15f996387a984ed"}, - {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79e00fcf0cb04ea35a22b9014712d448725ce4ddc9f08cc818322566176ca4b0"}, - {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc41afcefee2da6c4ed883a93d7f527c4b960cd1d26bbb0020a7b8c2d341a60a"}, - {file = "google_re2-1.1-5-cp39-cp39-win32.whl", hash = "sha256:486730b5e1f1c31b0abc6d80abe174ce4f1188fe17d1b50698f2bf79dc6e44be"}, - {file = "google_re2-1.1-5-cp39-cp39-win_amd64.whl", hash = "sha256:4de637ca328f1d23209e80967d1b987d6b352cd01b3a52a84b4d742c69c3da6c"}, - {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:621e9c199d1ff0fdb2a068ad450111a84b3bf14f96dfe5a8a7a0deae5f3f4cce"}, - {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:220acd31e7dde95373f97c3d1f3b3bd2532b38936af28b1917ee265d25bebbf4"}, - {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:db34e1098d164f76251a6ece30e8f0ddfd65bb658619f48613ce71acb3f9cbdb"}, - {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:5152bac41d8073977582f06257219541d0fc46ad99b0bbf30e8f60198a43b08c"}, - {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:6191294799e373ee1735af91f55abd23b786bdfd270768a690d9d55af9ea1b0d"}, - {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:070cbafbb4fecbb02e98feb28a1eb292fb880f434d531f38cc33ee314b521f1f"}, - {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8437d078b405a59a576cbed544490fe041140f64411f2d91012e8ec05ab8bf86"}, - {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f00f9a9af8896040e37896d9b9fc409ad4979f1ddd85bb188694a7d95ddd1164"}, - {file = "google_re2-1.1-6-cp310-cp310-win32.whl", hash = "sha256:df26345f229a898b4fd3cafd5f82259869388cee6268fc35af16a8e2293dd4e5"}, - {file = "google_re2-1.1-6-cp310-cp310-win_amd64.whl", hash = "sha256:3665d08262c57c9b28a5bdeb88632ad792c4e5f417e5645901695ab2624f5059"}, - {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b26b869d8aa1d8fe67c42836bf3416bb72f444528ee2431cfb59c0d3e02c6ce3"}, - {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:41fd4486c57dea4f222a6bb7f1ff79accf76676a73bdb8da0fcbd5ba73f8da71"}, - {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0ee378e2e74e25960070c338c28192377c4dd41e7f4608f2688064bd2badc41e"}, - {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:a00cdbf662693367b36d075b29feb649fd7ee1b617cf84f85f2deebeda25fc64"}, - {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4c09455014217a41499432b8c8f792f25f3df0ea2982203c3a8c8ca0e7895e69"}, - {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6501717909185327935c7945e23bb5aa8fc7b6f237b45fe3647fa36148662158"}, - {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3510b04790355f199e7861c29234081900e1e1cbf2d1484da48aa0ba6d7356ab"}, - {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c0e64c187ca406764f9e9ad6e750d62e69ed8f75bf2e865d0bfbc03b642361c"}, - {file = "google_re2-1.1-6-cp311-cp311-win32.whl", hash = "sha256:2a199132350542b0de0f31acbb3ca87c3a90895d1d6e5235f7792bb0af02e523"}, - {file = "google_re2-1.1-6-cp311-cp311-win_amd64.whl", hash = "sha256:83bdac8ceaece8a6db082ea3a8ba6a99a2a1ee7e9f01a9d6d50f79c6f251a01d"}, - {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:81985ff894cd45ab5a73025922ac28c0707759db8171dd2f2cc7a0e856b6b5ad"}, - {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5635af26065e6b45456ccbea08674ae2ab62494008d9202df628df3b267bc095"}, - {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:813b6f04de79f4a8fdfe05e2cb33e0ccb40fe75d30ba441d519168f9d958bd54"}, - {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:5ec2f5332ad4fd232c3f2d6748c2c7845ccb66156a87df73abcc07f895d62ead"}, - {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5a687b3b32a6cbb731647393b7c4e3fde244aa557f647df124ff83fb9b93e170"}, - {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:39a62f9b3db5d3021a09a47f5b91708b64a0580193e5352751eb0c689e4ad3d7"}, - {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ca0f0b45d4a1709cbf5d21f355e5809ac238f1ee594625a1e5ffa9ff7a09eb2b"}, - {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a64b3796a7a616c7861247bd061c9a836b5caf0d5963e5ea8022125601cf7b09"}, - {file = "google_re2-1.1-6-cp312-cp312-win32.whl", hash = "sha256:32783b9cb88469ba4cd9472d459fe4865280a6b1acdad4480a7b5081144c4eb7"}, - {file = "google_re2-1.1-6-cp312-cp312-win_amd64.whl", hash = "sha256:259ff3fd2d39035b9cbcbf375995f83fa5d9e6a0c5b94406ff1cc168ed41d6c6"}, - {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:e4711bcffe190acd29104d8ecfea0c0e42b754837de3fb8aad96e6cc3c613cdc"}, - {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4d081cce43f39c2e813fe5990e1e378cbdb579d3f66ded5bade96130269ffd75"}, - {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:4f123b54d48450d2d6b14d8fad38e930fb65b5b84f1b022c10f2913bd956f5b5"}, - {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:e1928b304a2b591a28eb3175f9db7f17c40c12cf2d4ec2a85fdf1cc9c073ff91"}, - {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:3a69f76146166aec1173003c1f547931bdf288c6b135fda0020468492ac4149f"}, - {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:fc08c388f4ebbbca345e84a0c56362180d33d11cbe9ccfae663e4db88e13751e"}, - {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b057adf38ce4e616486922f2f47fc7d19c827ba0a7f69d540a3664eba2269325"}, - {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4138c0b933ab099e96f5d8defce4486f7dfd480ecaf7f221f2409f28022ccbc5"}, - {file = "google_re2-1.1-6-cp38-cp38-win32.whl", hash = "sha256:9693e45b37b504634b1abbf1ee979471ac6a70a0035954592af616306ab05dd6"}, - {file = "google_re2-1.1-6-cp38-cp38-win_amd64.whl", hash = "sha256:5674d437baba0ea287a5a7f8f81f24265d6ae8f8c09384e2ef7b6f84b40a7826"}, - {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7783137cb2e04f458a530c6d0ee9ef114815c1d48b9102f023998c371a3b060e"}, - {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a49b7153935e7a303675f4deb5f5d02ab1305adefc436071348706d147c889e0"}, - {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:a96a8bb309182090704593c60bdb369a2756b38fe358bbf0d40ddeb99c71769f"}, - {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:dff3d4be9f27ef8ec3705eed54f19ef4ab096f5876c15fe011628c69ba3b561c"}, - {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:40f818b0b39e26811fa677978112a8108269977fdab2ba0453ac4363c35d9e66"}, - {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:8a7e53538cdb40ef4296017acfbb05cab0c19998be7552db1cfb85ba40b171b9"}, - {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ee18e7569fb714e5bb8c42809bf8160738637a5e71ed5a4797757a1fb4dc4de"}, - {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1cda4f6d1a7d5b43ea92bc395f23853fba0caf8b1e1efa6e8c48685f912fcb89"}, - {file = "google_re2-1.1-6-cp39-cp39-win32.whl", hash = "sha256:6a9cdbdc36a2bf24f897be6a6c85125876dc26fea9eb4247234aec0decbdccfd"}, - {file = "google_re2-1.1-6-cp39-cp39-win_amd64.whl", hash = "sha256:73f646cecfad7cc5b4330b4192c25f2e29730a3b8408e089ffd2078094208196"}, ] [[package]] @@ -6323,7 +6223,7 @@ xml = ["lxml (>=4.8.0)"] name = "paramiko" version = "3.4.1" description = "SSH2 protocol library" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "paramiko-3.4.1-py3-none-any.whl", hash = "sha256:8e49fd2f82f84acf7ffd57c64311aa2b30e575370dc23bdb375b10262f7eac32"}, @@ -7235,7 +7135,7 @@ rsa = ["cryptography"] name = "pynacl" version = "1.5.0" description = "Python binding to the Networking and Cryptography (NaCl) library" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"}, @@ -9849,12 +9749,14 @@ postgres = ["psycopg2-binary", "psycopg2cffi"] qdrant = ["qdrant-client"] redshift = ["psycopg2-binary", "psycopg2cffi"] s3 = ["botocore", "s3fs"] +sftp = ["paramiko"] snowflake = ["snowflake-connector-python"] sql-database = ["sqlalchemy"] +sqlalchemy = ["alembic", "sqlalchemy"] synapse = ["adlfs", "pyarrow", "pyodbc"] weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "1932fafe0cb36b059cb9ae464ebcd7e6e5b8b4f8eb2d1f014d33338cae939748" +content-hash = "cf2b7cd45b7127328b25128320607b25a2c3b869f2ee6f79412fa12dc56441eb"