diff --git a/.github/actions/setup-hadoop/action.yaml b/.github/actions/setup-hadoop/action.yaml new file mode 100644 index 000000000000..835371e42c5c --- /dev/null +++ b/.github/actions/setup-hadoop/action.yaml @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Setup Hadoop +description: 'Prepare hadoop binary and env' + +inputs: + need-hadoop: + description: "This setup needs hadoop or not" + +runs: + using: "composite" + steps: + - name: Setup java env + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "11" + + - name: Cache hadoop + id: cache-hadoop + uses: actions/cache@v4 + if: inputs.need-hadoop == 'true' + with: + path: /home/runner/hadoop-3.3.5 + key: cache-hadoop-3.3.5 + + - name: Build hadoop if not cached + if: steps.cache-hadoop.outputs.cache-hit != 'true' && inputs.need-hadoop == 'true' + shell: bash + run: | + set -e + curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner + + - name: Setup hadoop env + shell: bash + run: | + export HADOOP_HOME=/home/runner/hadoop-3.3.5 + echo "HADOOP_HOME=${HADOOP_HOME}" >> $GITHUB_ENV + echo "CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob)" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${{ env.JAVA_HOME }}/lib/server:${HADOOP_HOME}/lib/native" >> $GITHUB_ENV + cp ${{ github.workspace }}/fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml + + + + diff --git a/.github/scripts/test_go_binding/matrix.yaml b/.github/scripts/test_go_binding/matrix.yaml index e477aea073ef..b64fa19b6a21 100644 --- a/.github/scripts/test_go_binding/matrix.yaml +++ b/.github/scripts/test_go_binding/matrix.yaml @@ -15,12 +15,17 @@ # specific language governing permissions and limitations # under the License. -os: ["ubuntu-latest"] build: - target: "x86_64-unknown-linux-gnu" cc: "gcc" goos: "linux" goarch: "amd64" + os: "ubuntu-latest" + - target: "aarch64-apple-darwin" + cc: "clang" + goos: "darwin" + goarch: "arm64" + os: "macos-latest" service: - "fs" diff --git a/.github/services/hdfs/hdfs_cluster/action.yml b/.github/services/hdfs/hdfs_cluster/action.yml index f54624264b0a..2047c65ff32c 100644 --- a/.github/services/hdfs/hdfs_cluster/action.yml +++ b/.github/services/hdfs/hdfs_cluster/action.yml @@ -25,20 +25,10 @@ runs: shell: bash working-directory: fixtures/hdfs run: docker compose -f docker-compose-hdfs-cluster.yml up -d --wait - - name: Setup java env - uses: actions/setup-java@v4 + - name: Setup hadoop + uses: ./.github/actions/setup-hadoop with: - distribution: temurin - java-version: "11" - - name: Setup hadoop env - shell: bash - run: | - curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner - export HADOOP_HOME=/home/runner/hadoop-3.3.5 - echo "HADOOP_HOME=${HADOOP_HOME}" >> $GITHUB_ENV - echo "CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob)" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${{ env.JAVA_HOME }}/lib/server:${HADOOP_HOME}/lib/native" >> $GITHUB_ENV - cp ${{ github.workspace }}/fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml + need-hadoop: true - name: Setup opendal env shell: bash run: | diff --git a/.github/services/hdfs/hdfs_cluster_with_atomic_write_dir/action.yml b/.github/services/hdfs/hdfs_cluster_with_atomic_write_dir/action.yml index 860b6137a147..6ad8894b0a30 100644 --- a/.github/services/hdfs/hdfs_cluster_with_atomic_write_dir/action.yml +++ b/.github/services/hdfs/hdfs_cluster_with_atomic_write_dir/action.yml @@ -25,20 +25,10 @@ runs: shell: bash working-directory: fixtures/hdfs run: docker compose -f docker-compose-hdfs-cluster.yml up -d --wait - - name: Setup java env - uses: actions/setup-java@v4 + - name: Setup hadoop + uses: ./.github/actions/setup-hadoop with: - distribution: temurin - java-version: "11" - - name: Setup hadoop env - shell: bash - run: | - curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner - export HADOOP_HOME=/home/runner/hadoop-3.3.5 - echo "HADOOP_HOME=${HADOOP_HOME}" >> $GITHUB_ENV - echo "CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob)" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${{ env.JAVA_HOME }}/lib/server:${HADOOP_HOME}/lib/native" >> $GITHUB_ENV - cp ${{ github.workspace }}/fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml + need-hadoop: true - name: Setup opendal env shell: bash run: | diff --git a/.github/services/hdfs/hdfs_default/action.yml b/.github/services/hdfs/hdfs_default/action.yml index d4bdce0765d1..a6e2540dd76a 100644 --- a/.github/services/hdfs/hdfs_default/action.yml +++ b/.github/services/hdfs/hdfs_default/action.yml @@ -21,25 +21,14 @@ description: 'Behavior test for hdfs default' runs: using: "composite" steps: - - name: Setup java env - uses: actions/setup-java@v4 + - name: Setup hadoop + uses: ./.github/actions/setup-hadoop with: - distribution: temurin - java-version: "11" - - name: Setup + need-hadoop: true + - name: Setup OpenDAL env shell: bash run: | - curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner - - export HADOOP_HOME="/home/runner/hadoop-3.3.5" - export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) - - cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml - cat << EOF >> $GITHUB_ENV - HADOOP_HOME=${HADOOP_HOME} - CLASSPATH=${CLASSPATH} - LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native OPENDAL_HDFS_ROOT=/tmp/opendal/ OPENDAL_HDFS_NAME_NODE=default OPENDAL_HDFS_ENABLE_APPEND=false diff --git a/.github/services/hdfs/hdfs_default_with_atomic_write_dir/action.yml b/.github/services/hdfs/hdfs_default_with_atomic_write_dir/action.yml index b8de8671611a..57372570ff30 100644 --- a/.github/services/hdfs/hdfs_default_with_atomic_write_dir/action.yml +++ b/.github/services/hdfs/hdfs_default_with_atomic_write_dir/action.yml @@ -21,25 +21,14 @@ description: 'Behavior test for hdfs default with atomic write dir' runs: using: "composite" steps: - - name: Setup java env - uses: actions/setup-java@v4 + - name: Setup hadoop + uses: ./.github/actions/setup-hadoop with: - distribution: temurin - java-version: "11" - - name: Setup + need-hadoop: true + - name: Setup OpenDAL env shell: bash run: | - curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner - - export HADOOP_HOME="/home/runner/hadoop-3.3.5" - export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) - - cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml - cat << EOF >> $GITHUB_ENV - HADOOP_HOME=${HADOOP_HOME} - CLASSPATH=${CLASSPATH} - LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native OPENDAL_HDFS_ROOT=/tmp/opendal/ OPENDAL_HDFS_ATOMIC_WRITE_DIR=/tmp/atomic_write_dir/opendal/ OPENDAL_HDFS_NAME_NODE=default diff --git a/.github/services/s3/ceph_radios_s3_with_versioning/disable_action.yml b/.github/services/s3/ceph_radios_s3_with_versioning/disable_action.yml index 7838f5f52023..71b550d91e25 100644 --- a/.github/services/s3/ceph_radios_s3_with_versioning/disable_action.yml +++ b/.github/services/s3/ceph_radios_s3_with_versioning/disable_action.yml @@ -44,4 +44,5 @@ runs: OPENDAL_S3_SECRET_ACCESS_KEY=demo OPENDAL_S3_REGION=us-east-1 OPENDAL_S3_ENABLE_VERSIONING=true + OPENDAL_S3_DISABLE_WRITE_WITH_IF_MATCH=on EOF diff --git a/.github/services/s3/ceph_rados_s3/action.yml b/.github/services/s3/ceph_rados_s3/action.yml index dfb4b5ad6a6f..54e6e7049ddb 100644 --- a/.github/services/s3/ceph_rados_s3/action.yml +++ b/.github/services/s3/ceph_rados_s3/action.yml @@ -41,4 +41,5 @@ runs: OPENDAL_S3_ACCESS_KEY_ID=demo OPENDAL_S3_SECRET_ACCESS_KEY=demo OPENDAL_S3_REGION=us-east-1 + OPENDAL_S3_DISABLE_WRITE_WITH_IF_MATCH=on EOF diff --git a/.github/workflows/ci_bin_oli.yml b/.github/workflows/ci_bin_oli.yml index f6877485f785..fc146eea8db8 100644 --- a/.github/workflows/ci_bin_oli.yml +++ b/.github/workflows/ci_bin_oli.yml @@ -25,7 +25,7 @@ on: branches: - main paths: - - "bin/oay/**" + - "bin/oli/**" - "core/**" - ".github/workflows/ci_bin_oli.yml" diff --git a/.github/workflows/ci_bindings_cpp.yml b/.github/workflows/ci_bindings_cpp.yml index 9c1376efac6b..9f1fafa703b4 100644 --- a/.github/workflows/ci_bindings_cpp.yml +++ b/.github/workflows/ci_bindings_cpp.yml @@ -41,13 +41,13 @@ permissions: jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 - name: Install dependencies run: | sudo apt-get update - sudo apt-get install libgtest-dev ninja-build libboost-all-dev valgrind doxygen + sudo apt-get install ninja-build valgrind doxygen - name: Setup Rust toolchain uses: ./.github/actions/setup @@ -69,3 +69,12 @@ jobs: cmake -GNinja -DOPENDAL_ENABLE_TESTING=ON .. ninja valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose ./opendal_cpp_test + + - name: Build Cpp binding with async && Run tests + working-directory: "bindings/cpp" + run: | + mkdir build-async + cd build-async + cmake -GNinja -DOPENDAL_DEV=ON -DOPENDAL_ENABLE_ASYNC=ON -DCMAKE_CXX_COMPILER=clang++-18 .. + ninja + ./opendal_cpp_test diff --git a/.github/workflows/ci_bindings_go.yml b/.github/workflows/ci_bindings_go.yml index e4c4df73653a..2264e18fa9e6 100644 --- a/.github/workflows/ci_bindings_go.yml +++ b/.github/workflows/ci_bindings_go.yml @@ -70,7 +70,7 @@ jobs: echo "matrix=$MATRIX" >> $GITHUB_OUTPUT test: needs: [ matrix ] - runs-on: ${{ matrix.os }} + runs-on: ${{ matrix.build.os }} strategy: fail-fast: false matrix: ${{ fromJson(needs.matrix.outputs.matrix) }} @@ -98,6 +98,12 @@ jobs: run: | python -m pip install toml python tools/.github/scripts/setup_features.py + - name: Install dependencies (Linux) + if: ${{ matrix.build.os == 'ubuntu-latest' }} + run: sudo apt install zstd + - name: Install dependencies (macOS) + if: ${{ matrix.build.os == 'macos-latest' }} + run: brew install zstd libffi - name: Build C Binding working-directory: bindings/c env: @@ -105,16 +111,21 @@ jobs: SERVICE: ${{ matrix.service }} TARGET: ${{ matrix.build.target }} CC: ${{ matrix.build.cc }} + OS: ${{ matrix.build.os }} run: | cargo build --target $TARGET --release - sudo apt install zstd DIR=$GITHUB_WORKSPACE/libopendal_c_${VERSION}_${SERVICE}_$TARGET mkdir $DIR - zstd -22 ./target/$TARGET/release/libopendal_c.so -o $DIR/libopendal_c.$TARGET.so.zst + if [ ${OS} == 'ubuntu-latest' ]; then + SO=so + else + SO=dylib + fi + zstd -19 ./target/$TARGET/release/libopendal_c.$SO -o $DIR/libopendal_c.$TARGET.$SO.zst - name: Build Go Artifact working-directory: tools/internal/generate env: - MATRIX: ${{ needs.matrix.outputs.matrix }} + MATRIX: '{"build": [${{ toJson(matrix.build) }}], "service": ["${{ matrix.service }}"]}' VERSION: "latest" run: | go run generate.go @@ -132,4 +143,8 @@ jobs: OPENDAL_TEST: ${{ matrix.service }} OPENDAL_FS_ROOT: "/tmp/opendal/" working-directory: bindings/go/tests/behavior_tests - run: CGO_ENABLE=0 go test -v -run TestBehavior + run: | + if [ ${{ matrix.build.os }} == 'macos-latest' ]; then + export DYLD_FALLBACK_LIBRARY_PATH=$DYLD_FALLBACK_LIBRARY_PATH:/opt/homebrew/opt/libffi/lib + fi + CGO_ENABLE=0 go test -v -run TestBehavior diff --git a/.github/workflows/ci_bindings_nodejs.yml b/.github/workflows/ci_bindings_nodejs.yml index ded2005c39ed..eb60689f623c 100644 --- a/.github/workflows/ci_bindings_nodejs.yml +++ b/.github/workflows/ci_bindings_nodejs.yml @@ -153,7 +153,7 @@ jobs: shell: bash working-directory: . run: ${{ matrix.settings.build }} - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: bindings-linux-${{ matrix.settings.target }} path: bindings/nodejs/*.node @@ -201,7 +201,7 @@ jobs: - name: Build shell: bash run: ${{ matrix.settings.build }} - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: bindings-windows-${{ matrix.settings.target }} path: bindings/nodejs/*.node @@ -223,7 +223,7 @@ jobs: build: | rustup target add x86_64-apple-darwin; export NAPI_TARGET=x86_64-apple-darwin; - + pnpm build strip -x *.node - target: aarch64-apple-darwin @@ -291,7 +291,7 @@ jobs: run: pnpm install --frozen-lockfile - name: Download all artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: bindings/nodejs/artifacts - name: Move artifacts diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2fb8660c0fcb..002a9f28ea75 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -73,7 +73,7 @@ jobs: LD_LIBRARY_PATH: ${{ env.JAVA_HOME }}/lib/server:${{ env.LD_LIBRARY_PATH }} - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: rust-docs path: ./core/target/doc @@ -94,7 +94,7 @@ jobs: run: mvn javadoc:javadoc - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: java-docs path: ./bindings/java/target/site/apidocs @@ -131,7 +131,7 @@ jobs: run: pnpm run docs - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: nodejs-docs path: ./bindings/nodejs/docs @@ -158,7 +158,7 @@ jobs: run: pdoc -t ./template --output-dir ./docs opendal - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python-docs path: ./bindings/python/docs @@ -182,7 +182,7 @@ jobs: run: make doc - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: C-docs path: ./bindings/c/docs/doxygen/html @@ -201,7 +201,7 @@ jobs: run: ldoc ./src - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: lua-docs path: ./bindings/lua/doc/ @@ -241,7 +241,7 @@ jobs: find dist-newstyle -path '**/build/**/doc' -exec cp -r {}/html/opendal/ doc \; - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: haskell-docs path: ./bindings/haskell/doc/ @@ -270,7 +270,7 @@ jobs: ninja docs - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: cpp-docs path: ./bindings/cpp/build/docs_doxygen/html @@ -315,7 +315,7 @@ jobs: dune build @doc - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: ocaml-docs path: ./bindings/ocaml/_build/default/_doc/_html @@ -338,7 +338,7 @@ jobs: run: cargo +${{ env.RUST_DOC_TOOLCHAIN }} doc --lib --no-deps --all-features - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: object-store-opendal-docs path: ./integrations/object_store/target/doc @@ -362,7 +362,7 @@ jobs: run: cargo +${{ env.RUST_DOC_TOOLCHAIN }} doc --lib --no-deps --all-features - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dav-server-opendalfs-docs path: ./integrations/dav-server/target/doc @@ -385,7 +385,7 @@ jobs: run: cargo +${{ env.RUST_DOC_TOOLCHAIN }} doc --lib --no-deps --all-features - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: fuse3-opendal-docs path: ./integrations/fuse3/target/doc @@ -408,7 +408,7 @@ jobs: run: cargo +${{ env.RUST_DOC_TOOLCHAIN }} doc --lib --no-deps --all-features - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: unftp-sbe-opendal-docs path: ./integrations/unftp-sbe/target/doc @@ -431,7 +431,7 @@ jobs: run: cargo +${{ env.RUST_DOC_TOOLCHAIN }} doc --lib --no-deps --all-features - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: virtiofs-opendal-docs path: ./integrations/virtiofs/target/doc @@ -454,7 +454,7 @@ jobs: run: cargo +${{ env.RUST_DOC_TOOLCHAIN }} doc --lib --no-deps --all-features - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: object-parquet-docs path: ./integrations/parquet/target/doc @@ -477,7 +477,7 @@ jobs: run: cargo +${{ env.RUST_DOC_TOOLCHAIN }} doc --lib --no-deps --all-features - name: Upload docs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: opendal-compat-docs path: ./integrations/compat/target/doc @@ -524,91 +524,91 @@ jobs: run: corepack enable - name: Download rust docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: rust-docs path: ./website/static/docs/rust - name: Download nodejs docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: nodejs-docs path: ./website/static/docs/nodejs - name: Download python docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: python-docs path: ./website/static/docs/python - name: Download java docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: java-docs path: ./website/static/docs/java - name: Download C docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: C-docs path: ./website/static/docs/c - name: Download lua docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: lua-docs path: ./website/static/docs/lua - name: Download haskell docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: haskell-docs path: ./website/static/docs/haskell - name: Download cpp docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: cpp-docs path: ./website/static/docs/cpp - name: Download ocaml docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: ocaml-docs path: ./website/static/docs/ocaml - name: Download object-store-opendal docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: object-store-opendal-docs path: ./website/static/docs/object-store-opendal - name: Download opendal_compat docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: opendal-compat-docs path: ./website/static/docs/opendal_compat - name: Download dav-server-opendalfs docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dav-server-opendalfs-docs path: ./website/static/docs/dav-server-opendalfs - name: Download fuse3-opendal docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: fuse3-opendal-docs path: ./website/static/docs/fuse3-opendal - name: Download unftp-sbe-opendal docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: unftp-sbe-opendal-docs path: ./website/static/docs/unftp-sbe-opendal - name: Download virtiofs-opendal docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: virtiofs-opendal-docs path: ./website/static/docs/virtiofs-opendal diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index be849b8327db..2d98647fdfe2 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -20,7 +20,7 @@ name: Release Python on: push: tags: - - '*' + - "*" pull_request: branches: - main @@ -51,60 +51,48 @@ jobs: name: wheels path: bindings/python/dist - linux: - runs-on: ubuntu-latest + wheels: + runs-on: "${{ matrix.os }}" strategy: matrix: - target: [ x86_64, aarch64, armv7l ] + include: + - { os: windows-latest } + - { os: macos-latest, target: "universal2-apple-darwin" } + - { os: ubuntu-latest, target: "x86_64" } + - { os: ubuntu-latest, target: "aarch64" } + - { os: ubuntu-latest, target: "armv7l" } + env: + # Workaround ring 0.17 build issue + CFLAGS_aarch64_unknown_linux_gnu: "-D__ARM_ARCH=8" steps: - uses: actions/checkout@v4 - name: Setup Rust toolchain uses: ./.github/actions/setup - uses: PyO3/maturin-action@v1 with: - target: ${{ matrix.target }} - manylinux: auto working-directory: "bindings/python" + target: "${{ matrix.target }}" command: build - args: --release -o dist --find-interpreter --features=pyo3/extension-module,services-all - env: - # Workaround ring 0.17 build issue - CFLAGS_aarch64_unknown_linux_gnu: "-D__ARM_ARCH=8" - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: bindings/python/dist - - windows: - runs-on: windows-latest - steps: - - uses: actions/checkout@v4 - - name: Setup Rust toolchain - uses: ./.github/actions/setup + args: --release -o dist -i python3.11 --features=pyo3/extension-module,services-all,abi3 + sccache: true + manylinux: auto - uses: PyO3/maturin-action@v1 with: working-directory: "bindings/python" + target: "${{ matrix.target }}" command: build - args: --release -o dist --find-interpreter --features=pyo3/extension-module,services-all - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: bindings/python/dist - - macos: - runs-on: macos-latest - steps: - - uses: actions/checkout@v4 - - name: Setup Rust toolchain - uses: ./.github/actions/setup - - uses: PyO3/maturin-action@v1 + args: --release -o dist -i python3.10 --features=pyo3/extension-module,services-all + sccache: true + manylinux: auto + - name: Build free-threaded wheels + uses: PyO3/maturin-action@v1 with: working-directory: "bindings/python" + target: "${{ matrix.target }}" command: build - target: universal2-apple-darwin - args: --release -o dist --find-interpreter --features=pyo3/extension-module,services-all + args: --release -o dist -i python3.13t --features=pyo3/extension-module,services-all + sccache: true + manylinux: auto - name: Upload wheels uses: actions/upload-artifact@v3 with: @@ -118,7 +106,7 @@ jobs: permissions: contents: read id-token: write - needs: [ macos, linux, windows ] + needs: [sdist, wheels] steps: - uses: actions/download-artifact@v3 with: diff --git a/bin/oay/Cargo.lock b/bin/oay/Cargo.lock index 50369a005100..bc28fd095386 100644 --- a/bin/oay/Cargo.lock +++ b/bin/oay/Cargo.lock @@ -324,12 +324,6 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" -[[package]] -name = "flagset" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" - [[package]] name = "fnv" version = "1.0.7" @@ -870,7 +864,6 @@ dependencies = [ "base64 0.22.1", "bytes", "chrono", - "flagset", "futures", "getrandom", "http", diff --git a/bin/ofs/Cargo.lock b/bin/ofs/Cargo.lock index 9827165a4928..21eb4e3f5fac 100644 --- a/bin/ofs/Cargo.lock +++ b/bin/ofs/Cargo.lock @@ -1031,7 +1031,6 @@ dependencies = [ "chrono", "crc32c", "dotenvy", - "flagset", "futures", "getrandom", "http", diff --git a/bin/oli/Cargo.lock b/bin/oli/Cargo.lock index fef1627ac909..0ef096e042ec 100644 --- a/bin/oli/Cargo.lock +++ b/bin/oli/Cargo.lock @@ -742,6 +742,19 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width 0.1.14", + "windows-sys 0.52.0", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -970,6 +983,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "equivalent" version = "1.0.1" @@ -1050,12 +1069,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" -[[package]] -name = "flagset" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" - [[package]] name = "fnv" version = "1.0.7" @@ -1518,6 +1531,19 @@ dependencies = [ "hashbrown 0.15.0", ] +[[package]] +name = "indicatif" +version = "0.17.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width 0.2.0", + "web-time", +] + [[package]] name = "inout" version = "0.1.3" @@ -1676,7 +1702,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -1924,6 +1950,12 @@ dependencies = [ "libm", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" version = "0.36.5" @@ -1942,6 +1974,7 @@ dependencies = [ "clap", "dirs", "futures", + "indicatif", "opendal", "serde", "tempfile", @@ -1971,7 +2004,6 @@ dependencies = [ "crc32c", "dashmap 6.1.0", "etcd-client", - "flagset", "futures", "getrandom", "hdrs", @@ -2232,6 +2264,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "portable-atomic" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" + [[package]] name = "powerfmt" version = "0.2.0" @@ -3555,6 +3593,18 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "untrusted" version = "0.9.0" @@ -3730,6 +3780,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki" version = "0.22.4" diff --git a/bin/oli/Cargo.toml b/bin/oli/Cargo.toml index 97730b5d1416..19db567782e2 100644 --- a/bin/oli/Cargo.toml +++ b/bin/oli/Cargo.toml @@ -58,6 +58,7 @@ anyhow = "1" clap = { version = "4", features = ["cargo", "string", "derive", "deprecated"] } dirs = "5.0.1" futures = "0.3" +indicatif = "0.17.9" opendal = { version = "0.50.0", path = "../../core", features = [ # These are default features before v0.46. TODO: change to optional features "services-azblob", diff --git a/bin/oli/README.md b/bin/oli/README.md index ed1a409fb0e3..0a44e5a0ab5c 100644 --- a/bin/oli/README.md +++ b/bin/oli/README.md @@ -32,7 +32,7 @@ cargo install oli --all-features - `~/Library/Application Support/oli/config.toml` on macOS - `C:\Users\\AppData\Roaming\oli\config.toml` on Windows -The content of `config.toml` should be follow these pattern: +The content of `config.toml` should follow these pattern: ```toml [profiles.] @@ -72,8 +72,8 @@ For different services, you could find the configuration keys in the correspondi ### Example: use `oli` to upload file to AWS S3 ```text -$ oli cp ./update-ecs-loadbalancer.json s3://update-ecs-loadbalancer.json -$ oli ls s3:// +$ oli cp ./update-ecs-loadbalancer.json s3:/update-ecs-loadbalancer.json +$ oli ls s3:/ fleet.png update-ecs-loadbalancer.json ``` @@ -81,8 +81,8 @@ update-ecs-loadbalancer.json ### Example: use `oli` copy file from S3 to R2 ```text -$ oli cp s3://fleet.png r2://fleet.png -$ oli ls r2:// +$ oli cp s3:/fleet.png r2:/fleet.png +$ oli ls r2:/ fleet.png ``` diff --git a/bin/oli/src/bin/oli.rs b/bin/oli/src/bin/oli.rs index 99ba3d276307..8ae115602f4a 100644 --- a/bin/oli/src/bin/oli.rs +++ b/bin/oli/src/bin/oli.rs @@ -74,6 +74,10 @@ async fn main() -> Result<()> { let cmd: oli::commands::stat::StatCmd = clap::Parser::parse(); cmd.run().await?; } + Some("omv") => { + let cmd: oli::commands::mv::MoveCmd = clap::Parser::parse(); + cmd.run().await?; + } Some(v) => { println!("{v} is not supported") } diff --git a/bin/oli/src/commands/cat.rs b/bin/oli/src/commands/cat.rs index fa892be6680e..567ebf9f1cba 100644 --- a/bin/oli/src/commands/cat.rs +++ b/bin/oli/src/commands/cat.rs @@ -30,6 +30,7 @@ use crate::params::config::ConfigParams; pub struct CatCmd { #[command(flatten)] pub config_params: ConfigParams, + /// In the form of `:/`. #[arg()] pub target: String, } diff --git a/bin/oli/src/commands/cp.rs b/bin/oli/src/commands/cp.rs index e96f65097c3d..ed954a3df1ae 100644 --- a/bin/oli/src/commands/cp.rs +++ b/bin/oli/src/commands/cp.rs @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +use futures::AsyncBufReadExt; +use indicatif::ProgressBar; +use indicatif::ProgressStyle; +use opendal::Metadata; use std::path::Path; use anyhow::Result; @@ -24,13 +28,29 @@ use futures::TryStreamExt; use crate::config::Config; use crate::params::config::ConfigParams; +/// Template for the progress bar display. +/// +/// The template includes: +/// - `{spinner:.green}`: A green spinner to indicate ongoing progress. +/// - `{elapsed_precise}`: The precise elapsed time. +/// - `{bar:40.cyan/blue}`: A progress bar with a width of 40 characters, +/// cyan for the completed portion and blue for the remaining portion. +/// - `{bytes}/{total_bytes}`: The number of bytes copied so far and the total bytes to be copied. +/// - `{eta}`: The estimated time of arrival (completion). +const PROGRESS_BAR_TEMPLATE: &str = + "{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta})"; + +const PROGRESS_CHARS: &str = "#>-"; + #[derive(Debug, clap::Parser)] #[command(name = "cp", about = "Copy object", disable_version_flag = true)] pub struct CopyCmd { #[command(flatten)] pub config_params: ConfigParams, + /// In the form of `:/`. #[arg()] pub source: String, + /// In the form of `:/`. #[arg()] pub destination: String, /// Copy objects recursively. @@ -53,8 +73,9 @@ impl CopyCmd { let buf_reader = reader .into_futures_async_read(0..src_meta.content_length()) .await?; - futures::io::copy_buf(buf_reader, &mut dst_w).await?; - // flush data + + let copy_progress = CopyProgress::new(&src_meta, src_path.clone()); + copy_progress.copy(buf_reader, &mut dst_w).await?; dst_w.close().await?; return Ok(()); } @@ -78,15 +99,59 @@ impl CopyCmd { .into_futures_async_read(0..meta.content_length()) .await?; + let copy_progress = CopyProgress::new(meta, de.path().to_string()); let mut writer = dst_op .writer(&dst_root.join(fp).to_string_lossy()) .await? .into_futures_async_write(); - println!("Copying {}", de.path()); - futures::io::copy_buf(buf_reader, &mut writer).await?; + copy_progress.copy(buf_reader, &mut writer).await?; writer.close().await?; } Ok(()) } } + +/// Helper struct to display progress of a copy operation. +struct CopyProgress { + progress_bar: ProgressBar, + path: String, +} + +impl CopyProgress { + fn new(meta: &Metadata, path: String) -> Self { + let pb = ProgressBar::new(meta.content_length()); + pb.set_style( + ProgressStyle::default_bar() + .template(PROGRESS_BAR_TEMPLATE) + .expect("invalid template") + .progress_chars(PROGRESS_CHARS), + ); + Self { + progress_bar: pb, + path, + } + } + + async fn copy(&self, mut reader: R, writer: &mut W) -> std::io::Result + where + R: futures::AsyncBufRead + Unpin, + W: futures::AsyncWrite + Unpin + ?Sized, + { + let mut written = 0; + loop { + let buf = reader.fill_buf().await?; + if buf.is_empty() { + break; + } + writer.write_all(buf).await?; + let len = buf.len(); + reader.consume_unpin(len); + written += len as u64; + self.progress_bar.inc(len as u64); + } + self.progress_bar.finish_and_clear(); + println!("Finish {}", self.path); + Ok(written) + } +} diff --git a/bin/oli/src/commands/ls.rs b/bin/oli/src/commands/ls.rs index 3e7eb1026889..10050982936d 100644 --- a/bin/oli/src/commands/ls.rs +++ b/bin/oli/src/commands/ls.rs @@ -26,6 +26,7 @@ use crate::params::config::ConfigParams; pub struct LsCmd { #[command(flatten)] pub config_params: ConfigParams, + /// In the form of `:/`. #[arg()] pub target: String, /// List objects recursively. diff --git a/bin/oli/src/commands/mod.rs b/bin/oli/src/commands/mod.rs index e70d0c101a69..0ab829edb647 100644 --- a/bin/oli/src/commands/mod.rs +++ b/bin/oli/src/commands/mod.rs @@ -20,6 +20,7 @@ pub mod cat; pub mod cp; pub mod ls; +pub mod mv; pub mod rm; pub mod stat; @@ -30,6 +31,7 @@ pub enum OliSubcommand { Ls(ls::LsCmd), Rm(rm::RmCmd), Stat(stat::StatCmd), + Mv(mv::MoveCmd), } impl OliSubcommand { @@ -40,6 +42,7 @@ impl OliSubcommand { Self::Ls(cmd) => cmd.run().await, Self::Rm(cmd) => cmd.run().await, Self::Stat(cmd) => cmd.run().await, + Self::Mv(cmd) => cmd.run().await, } } } diff --git a/bin/oli/src/commands/mv.rs b/bin/oli/src/commands/mv.rs new file mode 100644 index 000000000000..3f5fa093a010 --- /dev/null +++ b/bin/oli/src/commands/mv.rs @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::config::Config; +use crate::params::config::ConfigParams; +use anyhow::{Error, Result}; +use futures::{AsyncWriteExt, TryStreamExt}; +use opendal::Operator; +use std::path::Path; + +#[derive(Debug, clap::Parser)] +#[command(name = "mv", about = "Move object", disable_version_flag = true)] +pub struct MoveCmd { + #[command(flatten)] + pub config_params: ConfigParams, + #[arg()] + pub source: String, + #[arg()] + pub destination: String, + /// Move objects recursively. + #[arg(short = 'r', long)] + pub recursive: bool, +} + +impl MoveCmd { + pub async fn run(&self) -> Result<()> { + let cfg = Config::load(&self.config_params.config)?; + + let (src_op, src_path) = cfg.parse_location(&self.source)?; + let (dst_op, dst_path) = cfg.parse_location(&self.destination)?; + + let src_meta = src_op.stat(&src_path).await?; + if !self.recursive || src_meta.is_file() { + if src_meta.is_dir() { + return Err(Error::msg("can not move a directory in non-recursive mode")); + } + + let mut actual_dst_path = dst_path.clone(); + if let Ok(meta) = dst_op.stat(&dst_path).await { + if meta.is_dir() && !dst_path.ends_with("/") { + actual_dst_path.push('/'); + } + } + if actual_dst_path.is_empty() || actual_dst_path.ends_with("/") { + let file_name = src_path.rsplit_once("/").unwrap_or(("", &src_path)).1; + actual_dst_path.push_str(file_name); + } + + println!("Moving: {}", src_path); + self.cp_file( + &src_op, + &src_path, + &dst_op, + &actual_dst_path, + src_meta.content_length(), + ) + .await?; + src_op.delete(&src_path).await?; + + return Ok(()); + } + + let dst_root = Path::new(&dst_path); + let prefix = src_path.strip_prefix('/').unwrap_or(src_path.as_str()); + let mut lst = src_op.lister_with(&src_path).recursive(true).await?; + while let Some(entry) = lst.try_next().await? { + let path = entry.path(); + if path == src_path { + continue; + } + + let suffix = path.strip_prefix(prefix).expect("invalid path"); + let depath = dst_root.join(suffix); + + println!("Moving: {}", path); + let meta = entry.metadata(); + if meta.is_dir() { + dst_op.create_dir(&depath.to_string_lossy()).await?; + src_op.delete(path).await?; + continue; + } + + let path_metadata = src_op.stat(path).await?; + self.cp_file( + &src_op, + path, + &dst_op, + &depath.to_string_lossy(), + path_metadata.content_length(), + ) + .await?; + + src_op.delete(path).await?; + } + + Ok(()) + } + + async fn cp_file( + &self, + src_op: &Operator, + src_path: &str, + dst_op: &Operator, + dst_path: &str, + length: u64, + ) -> Result<()> { + let src_reader = src_op + .reader_with(src_path) + .chunk(8 * 1024 * 1024) + .await? + .into_futures_async_read(0..length) + .await?; + + let mut dst_writer = dst_op.writer(dst_path).await?.into_futures_async_write(); + + futures::io::copy_buf(src_reader, &mut dst_writer).await?; + dst_writer.close().await?; + + Ok(()) + } +} diff --git a/bin/oli/src/commands/rm.rs b/bin/oli/src/commands/rm.rs index 04bc258d48c9..5c38bf1a001e 100644 --- a/bin/oli/src/commands/rm.rs +++ b/bin/oli/src/commands/rm.rs @@ -25,6 +25,7 @@ use crate::params::config::ConfigParams; pub struct RmCmd { #[command(flatten)] pub config_params: ConfigParams, + /// In the form of `:/`. #[arg()] pub target: String, /// Remove objects recursively. diff --git a/bin/oli/src/commands/stat.rs b/bin/oli/src/commands/stat.rs index 8f1051f1a1c7..04cc2431bfca 100644 --- a/bin/oli/src/commands/stat.rs +++ b/bin/oli/src/commands/stat.rs @@ -29,6 +29,7 @@ use crate::params::config::ConfigParams; pub struct StatCmd { #[command(flatten)] pub config_params: ConfigParams, + /// In the form of `:/`. #[arg()] pub target: String, } diff --git a/bin/oli/src/config/mod.rs b/bin/oli/src/config/mod.rs index d15ea884a498..3ed44e0246cc 100644 --- a/bin/oli/src/config/mod.rs +++ b/bin/oli/src/config/mod.rs @@ -37,7 +37,7 @@ pub struct Config { profiles: HashMap>, } -/// resolve_relative_path turns a relative path to a absolute path. +/// resolve_relative_path turns a relative path to an absolute path. /// /// The reason why we don't use `fs::canonicalize` here is `fs::canonicalize` /// will return an error if the path does not exist, which is unwanted. @@ -147,7 +147,7 @@ impl Config { let location = Url::parse(s)?; if location.has_host() { - Err(anyhow!("Host part in a location is not supported."))?; + Err(anyhow!("Host part in a location is not supported. Hint: are you typing `://` instead of `:/`?"))?; } let profile_name = location.scheme(); @@ -338,7 +338,7 @@ enable_virtual_host_style = "on" let uri = "mys3://foo/1.txt"; let expected_msg = "Host part in a location is not supported."; match cfg.parse_location(uri) { - Err(e) if e.to_string() == expected_msg => Ok(()), + Err(e) if e.to_string().contains(expected_msg) => Ok(()), _ => Err(anyhow!( "Getting an message \"{}\" is expected when parsing {}.", expected_msg, diff --git a/bin/oli/tests/mv.rs b/bin/oli/tests/mv.rs new file mode 100644 index 000000000000..926a937a684e --- /dev/null +++ b/bin/oli/tests/mv.rs @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use anyhow::Result; +use assert_cmd::Command; +use std::fs; + +#[tokio::test] +async fn test_basic_mv() -> Result<()> { + let dir = tempfile::tempdir()?; + let src_path = dir.path().join("src.txt"); + let dst_path = dir.path().join("dst.txt"); + let expect = "hello"; + fs::write(&src_path, expect)?; + + let mut cmd = Command::cargo_bin("oli")?; + cmd.arg("mv") + .arg(src_path.as_os_str()) + .arg(dst_path.as_os_str()); + cmd.assert().success(); + + let actual = fs::read_to_string(&dst_path)?; + assert_eq!(actual, expect); + + assert!(!fs::exists(&src_path)?); + + Ok(()) +} + +#[tokio::test] +async fn test_move_a_file_to_a_dir() -> Result<()> { + let src_dir = tempfile::tempdir()?; + let src_path = src_dir.path().join("src.txt"); + let expect = "hello"; + fs::write(&src_path, expect)?; + + let dst_dir = tempfile::tempdir()?; + let dst_path = dst_dir.path().join("dir/"); + + let mut cmd = Command::cargo_bin("oli")?; + cmd.arg("mv") + .arg(src_path.as_os_str()) + .arg(dst_path.as_os_str()); + cmd.assert().success(); + + let dst_path = dst_path.join("src.txt"); + let actual = fs::read_to_string(&dst_path)?; + assert_eq!(actual, expect); + + assert!(!fs::exists(&src_path)?); + + Ok(()) +} + +#[tokio::test] +async fn test_mv_with_recursive() -> Result<()> { + let src_root = tempfile::tempdir()?; + let src_path = src_root.path().join("src/"); + fs::create_dir(&src_path)?; + + let src_file1 = src_path.as_path().join("file1.txt"); + let file1_content = "file1"; + fs::write(&src_file1, file1_content).expect("write file1 error"); + + let src_dir = src_path.join("dir/"); + fs::create_dir(&src_dir)?; + let src_file2 = src_dir.as_path().join("file2.txt"); + let file2_content = "file2"; + fs::write(&src_file2, file2_content).expect("write file2 error"); + + let src_empty_dir = src_path.join("empty_dir/"); + fs::create_dir(&src_empty_dir)?; + + let dst_path = tempfile::tempdir()?; + + let mut cmd = Command::cargo_bin("oli")?; + cmd.arg("mv") + .arg(src_path.as_os_str()) + .arg(dst_path.path().as_os_str()) + .arg("-r"); + cmd.assert().success(); + + let dst_file1_content = + fs::read_to_string(dst_path.path().join("file1.txt")).expect("read file1 error"); + assert_eq!(dst_file1_content, file1_content); + let dst_file2_content = + fs::read_to_string(dst_path.path().join("dir/file2.txt")).expect("read dir/file2 error"); + assert_eq!(dst_file2_content, file2_content); + assert!(fs::exists(dst_path.path().join("empty_dir/"))?); + + // src_path is empty now + let mut src_data = fs::read_dir(&src_path)?; + assert!(src_data.next().is_none()); + + Ok(()) +} diff --git a/bindings/c/include/opendal.h b/bindings/c/include/opendal.h index 76f84f93341e..3560737dda2d 100644 --- a/bindings/c/include/opendal.h +++ b/bindings/c/include/opendal.h @@ -585,6 +585,10 @@ typedef struct opendal_capability { * If it is not set, this will be zero */ uintptr_t batch_max_operations; + /** + * If operator supports shared. + */ + bool shared; /** * If operator supports blocking. */ diff --git a/bindings/c/src/operator_info.rs b/bindings/c/src/operator_info.rs index db96e99fedf3..e08becbc65f3 100644 --- a/bindings/c/src/operator_info.rs +++ b/bindings/c/src/operator_info.rs @@ -135,6 +135,9 @@ pub struct opendal_capability { /// If it is not set, this will be zero pub batch_max_operations: usize, + /// If operator supports shared. + pub shared: bool, + /// If operator supports blocking. pub blocking: bool, } @@ -263,6 +266,7 @@ impl From for opendal_capability { batch: value.batch, batch_delete: value.batch_delete, batch_max_operations: value.batch_max_operations.unwrap_or(0), + shared: value.shared, blocking: value.blocking, } } diff --git a/bindings/cpp/CMakeLists.txt b/bindings/cpp/CMakeLists.txt index 0d8ce89dfed6..2762a14303ac 100644 --- a/bindings/cpp/CMakeLists.txt +++ b/bindings/cpp/CMakeLists.txt @@ -16,14 +16,19 @@ # under the License. cmake_minimum_required(VERSION 3.22) + +# CMP0135: set the timestamps of all extracted contents +# to the time of the extraction in FetchContent +if (POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + project(opendal-cpp LANGUAGES CXX) include(FetchContent) set(OPENDAL_GOOGLETEST_VERSION 1.15.2 CACHE STRING "version of GoogleTest, 'external' to fallback to find_package()") set(OPENDAL_BOOST_VERSION 1.86.0 CACHE STRING "version of Boost, 'external' to fallback to find_package()") - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(OPENDAL_CPPCORO_VERSION a4ef65281814b18fdd1ac5457d3e219347ec6cb8 CACHE STRING "version of cppcoro") if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Debug) @@ -34,6 +39,18 @@ option(OPENDAL_ENABLE_DOCUMENTATION "Enable generating document for opendal" OFF option(OPENDAL_DOCS_ONLY "Only build documentation (dev only for quick ci)" OFF) option(OPENDAL_ENABLE_TESTING "Enable building test binary for opendal" OFF) option(OPENDAL_DEV "Enable dev mode" OFF) +option(OPENDAL_ENABLE_ASYNC "Enable async mode (requires C++20)" OFF) + +if(OPENDAL_ENABLE_ASYNC) + set(CMAKE_CXX_STANDARD 20) + + if (NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"))) + message(FATAL_ERROR "currently C++ compiler must be clang for async mode") + endif() +else() + set(CMAKE_CXX_STANDARD 17) +endif() +set(CMAKE_CXX_STANDARD_REQUIRED ON) if (OPENDAL_DEV) set(OPENDAL_ENABLE_ADDRESS_SANITIZER ON) @@ -69,42 +86,48 @@ execute_process(COMMAND cargo locate-project --workspace --message-format plain string(REGEX REPLACE "/Cargo.toml\n$" "/target" CARGO_TARGET_DIR "${CARGO_TARGET_DIR}") set(CARGO_MANIFEST ${PROJECT_SOURCE_DIR}/Cargo.toml) set(RUST_SOURCE_FILE ${PROJECT_SOURCE_DIR}/src/lib.rs) -set(RUST_BRIDGE_CPP ${CARGO_TARGET_DIR}/cxxbridge/opendal-cpp/src/lib.rs.cc) -set(RUST_HEADER_FILE ${CARGO_TARGET_DIR}/cxxbridge/opendal-cpp/src/lib.rs.h) +list(APPEND RUST_BRIDGE_CPP ${CARGO_TARGET_DIR}/cxxbridge/opendal-cpp/src/lib.rs.cc) +list(APPEND RUST_HEADER_FILE ${CARGO_TARGET_DIR}/cxxbridge/opendal-cpp/src/lib.rs.h) +if (OPENDAL_ENABLE_ASYNC) + list(APPEND RUST_BRIDGE_CPP ${CARGO_TARGET_DIR}/cxxbridge/opendal-cpp/src/async.rs.cc) + list(APPEND RUST_HEADER_FILE ${CARGO_TARGET_DIR}/cxxbridge/opendal-cpp/src/async.rs.h) +endif() if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(RUST_LIB ${CARGO_TARGET_DIR}/debug/${CMAKE_STATIC_LIBRARY_PREFIX}opendal_cpp${CMAKE_STATIC_LIBRARY_SUFFIX}) else() set(RUST_LIB ${CARGO_TARGET_DIR}/release/${CMAKE_STATIC_LIBRARY_PREFIX}opendal_cpp${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() set(CPP_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include ${CARGO_TARGET_DIR}/cxxbridge/opendal-cpp/src) -file(GLOB_RECURSE CPP_SOURCE_FILE src/*.cpp) -file(GLOB_RECURSE CPP_HEADER_FILE include/*.hpp) +list(APPEND CPP_SOURCE_FILE src/opendal.cpp) +list(APPEND CPP_HEADER_FILE include/opendal.hpp) +if (OPENDAL_ENABLE_ASYNC) + list(APPEND CPP_SOURCE_FILE src/opendal_async.cpp) + list(APPEND CPP_HEADER_FILE include/opendal_async.hpp) +endif() -if (CMAKE_BUILD_TYPE STREQUAL "Debug") - add_custom_command( - OUTPUT ${RUST_BRIDGE_CPP} ${RUST_LIB} ${RUST_HEADER_FILE} - COMMAND cargo build --manifest-path ${CARGO_MANIFEST} - DEPENDS ${RUST_SOURCE_FILE} - USES_TERMINAL - COMMENT "Running cargo..." - ) -else() - add_custom_command( - OUTPUT ${RUST_BRIDGE_CPP} ${RUST_LIB} ${RUST_HEADER_FILE} - COMMAND cargo build --manifest-path ${CARGO_MANIFEST} --release - DEPENDS ${RUST_SOURCE_FILE} - USES_TERMINAL - COMMENT "Running cargo..." - ) +if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND CARGO_BUILD_FLAGS "--release") endif() +if (OPENDAL_ENABLE_ASYNC) + list(APPEND CARGO_BUILD_FLAGS "--features" "async") +endif() + +add_custom_target(cargo_build + COMMAND cargo build --manifest-path ${CARGO_MANIFEST} ${CARGO_BUILD_FLAGS} + BYPRODUCTS ${RUST_BRIDGE_CPP} ${RUST_LIB} ${RUST_HEADER_FILE} + DEPENDS ${RUST_SOURCE_FILE} + USES_TERMINAL + COMMENT "Running cargo..." +) + if(OPENDAL_BOOST_VERSION STREQUAL "external") find_package(Boost REQUIRED COMPONENTS date_time iostreams) else() # fetch Boost FetchContent_Declare( - Boost - URL https://github.com/boostorg/boost/releases/download/boost-${OPENDAL_BOOST_VERSION}/boost-${OPENDAL_BOOST_VERSION}-cmake.zip + Boost + URL https://github.com/boostorg/boost/releases/download/boost-${OPENDAL_BOOST_VERSION}/boost-${OPENDAL_BOOST_VERSION}-cmake.zip ) set(BOOST_INCLUDE_LIBRARIES date_time iostreams system) @@ -115,12 +138,17 @@ endif() add_library(opendal_cpp STATIC ${CPP_SOURCE_FILE} ${RUST_BRIDGE_CPP}) target_sources(opendal_cpp PUBLIC ${CPP_HEADER_FILE}) target_sources(opendal_cpp PRIVATE ${RUST_HEADER_FILE}) -target_include_directories(opendal_cpp PUBLIC ${CPP_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) -target_link_libraries(opendal_cpp PUBLIC ${RUST_LIB}) -target_link_libraries(opendal_cpp PRIVATE ${CMAKE_DL_LIBS} Boost::date_time) +target_include_directories(opendal_cpp PUBLIC ${CPP_INCLUDE_DIR}) +if (OPENDAL_ENABLE_ASYNC) + target_include_directories(opendal_cpp PUBLIC ${CARGO_TARGET_DIR}/cxxbridge) + target_compile_options(opendal_cpp PUBLIC -include ${PROJECT_SOURCE_DIR}/include/async_defs.hpp) +endif() +target_link_libraries(opendal_cpp PUBLIC ${RUST_LIB} Boost::date_time Boost::iostreams) +target_link_libraries(opendal_cpp PRIVATE ${CMAKE_DL_LIBS}) set_target_properties(opendal_cpp PROPERTIES ADDITIONAL_CLEAN_FILES ${CARGO_TARGET_DIR} ) +add_dependencies(opendal_cpp cargo_build) if (OPENDAL_ENABLE_ADDRESS_SANITIZER) target_compile_options(opendal_cpp PRIVATE -fsanitize=leak,address,undefined -fno-omit-frame-pointer -fno-common -O1) @@ -156,11 +184,25 @@ if (OPENDAL_ENABLE_TESTING) FetchContent_MakeAvailable(googletest) endif() - file(GLOB_RECURSE TEST_SOURCE_FILE tests/*.cpp) + if (OPENDAL_ENABLE_ASYNC) + FetchContent_Declare( + cppcoro + URL https://github.com/andreasbuhr/cppcoro/archive/${OPENDAL_CPPCORO_VERSION}.zip + ) + FetchContent_MakeAvailable(cppcoro) + endif() + + list(APPEND TEST_SOURCE_FILE tests/basic_test.cpp) + if (OPENDAL_ENABLE_ASYNC) + list(APPEND TEST_SOURCE_FILE tests/async_test.cpp) + endif() add_executable(opendal_cpp_test ${TEST_SOURCE_FILE}) target_include_directories(opendal_cpp_test PUBLIC ${CPP_INCLUDE_DIR} ${GTEST_INCLUDE_DIRS}) target_link_libraries(opendal_cpp_test ${GTEST_LDFLAGS} GTest::gtest_main opendal_cpp) target_compile_options(opendal_cpp_test PRIVATE ${GTEST_CFLAGS}) + if (OPENDAL_ENABLE_ASYNC) + target_link_libraries(opendal_cpp_test cppcoro) + endif() # enable address sanitizers if (OPENDAL_ENABLE_ADDRESS_SANITIZER) diff --git a/bindings/cpp/Cargo.toml b/bindings/cpp/Cargo.toml index 4f711b7e9053..2d82dc8c260b 100644 --- a/bindings/cpp/Cargo.toml +++ b/bindings/cpp/Cargo.toml @@ -34,6 +34,7 @@ crate-type = ["staticlib"] anyhow = "1.0" chrono = "0.4" cxx = "1.0" +cxx-async = { version = "0.1.2", optional = true } # this crate won't be published, we always use the local version opendal = { version = ">=0", path = "../../core", features = [ # These are default features before v0.46. TODO: change to optional features @@ -56,3 +57,6 @@ opendal = { version = ">=0", path = "../../core", features = [ [build-dependencies] cxx-build = "1.0" + +[features] +async = ["cxx-async", "cxx/c++20"] diff --git a/bindings/cpp/build.rs b/bindings/cpp/build.rs index 7d5d10f7b1b2..168102b0a221 100644 --- a/bindings/cpp/build.rs +++ b/bindings/cpp/build.rs @@ -15,8 +15,46 @@ // specific language governing permissions and limitations // under the License. +#[cfg(feature = "async")] +mod build_async { + use std::{ + env::var, + io, + path::{Path, PathBuf}, + }; + + fn copy_force, Q: AsRef>(src: P, dst: Q) -> io::Result<()> { + if dst.as_ref().exists() { + std::fs::remove_file(&dst)?; + } + + std::fs::copy(src, dst)?; + Ok(()) + } + + pub fn symlink_async_includes() { + let async_inc = var("DEP_CXX_ASYNC_INCLUDE").unwrap(); + let src_dir = PathBuf::from(async_inc).join("rust"); + + let prj_dir = var("CARGO_MANIFEST_DIR").unwrap(); + let dst_dir = PathBuf::from(prj_dir) + .join("target") + .join("cxxbridge") + .join("rust"); + + copy_force(src_dir.join("cxx_async.h"), dst_dir.join("cxx_async.h")).unwrap(); + } +} + fn main() { let _ = cxx_build::bridge("src/lib.rs"); + #[cfg(feature = "async")] + { + let _ = cxx_build::bridge("src/async.rs"); + build_async::symlink_async_includes(); + } println!("cargo:rerun-if-changed=src/lib.rs"); + #[cfg(feature = "async")] + println!("cargo:rerun-if-changed=src/async.rs"); } diff --git a/bindings/cpp/include/async_defs.hpp b/bindings/cpp/include/async_defs.hpp new file mode 100644 index 000000000000..c8a831d0ccc1 --- /dev/null +++ b/bindings/cpp/include/async_defs.hpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include "rust/cxx.h" +#include "rust/cxx_async.h" + +CXXASYNC_DEFINE_FUTURE(rust::Vec, opendal, ffi, async, RustFutureRead); +CXXASYNC_DEFINE_FUTURE(void, opendal, ffi, async, RustFutureWrite); diff --git a/bindings/cpp/include/opendal_async.hpp b/bindings/cpp/include/opendal_async.hpp new file mode 100644 index 000000000000..45524df5bb06 --- /dev/null +++ b/bindings/cpp/include/opendal_async.hpp @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "async.rs.h" +#include "async_defs.hpp" + +namespace opendal::async { + +class Operator { + public: + Operator(std::string_view scheme, + const std::unordered_map &config = {}); + + // Disable copy and assign + Operator(const Operator &) = delete; + Operator &operator=(const Operator &) = delete; + + // Enable move + Operator(Operator &&) = default; + Operator &operator=(Operator &&) = default; + ~Operator() = default; + + using ReadFuture = opendal::ffi::async::RustFutureRead; + ReadFuture read(std::string_view path); + + using WriteFuture = opendal::ffi::async::RustFutureWrite; + WriteFuture write(std::string_view path, std::span data); + + private: + rust::Box operator_; +}; + +} // namespace opendal::async diff --git a/bindings/cpp/src/async.rs b/bindings/cpp/src/async.rs new file mode 100644 index 000000000000..595006f269e0 --- /dev/null +++ b/bindings/cpp/src/async.rs @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use anyhow::Result; +use cxx_async::CxxAsyncException; +use opendal as od; +use std::collections::HashMap; +use std::future::Future; +use std::ops::Deref; +use std::str::FromStr; + +#[cxx::bridge(namespace = "opendal::ffi::async")] +mod ffi { + struct HashMapValue { + key: String, + value: String, + } + + // here we have to use raw pointers since: + // 1. cxx-async futures requires 'static lifetime (and it's hard to change for now) + // 2. cxx SharedPtr cannot accept Rust types as type parameters for now + pub struct OperatorPtr { + op: *const Operator, + } + + extern "Rust" { + type Operator; + + fn new_operator(scheme: &str, configs: Vec) -> Result>; + unsafe fn operator_read(op: OperatorPtr, path: String) -> RustFutureRead; + unsafe fn operator_write(op: OperatorPtr, path: String, bs: Vec) -> RustFutureWrite; + } + + extern "C++" { + type RustFutureRead = super::RustFutureRead; + type RustFutureWrite = super::RustFutureWrite; + } +} + +#[cxx_async::bridge(namespace = opendal::ffi::async)] +unsafe impl Future for RustFutureRead { + type Output = Vec; +} + +#[cxx_async::bridge(namespace = opendal::ffi::async)] +unsafe impl Future for RustFutureWrite { + type Output = (); +} + +pub struct Operator(od::Operator); + +fn new_operator(scheme: &str, configs: Vec) -> Result> { + let scheme = od::Scheme::from_str(scheme)?; + + let map: HashMap = configs + .into_iter() + .map(|value| (value.key, value.value)) + .collect(); + + let op = Box::new(Operator(od::Operator::via_iter(scheme, map)?)); + + Ok(op) +} + +impl Deref for ffi::OperatorPtr { + type Target = Operator; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.op } + } +} + +unsafe impl Send for ffi::OperatorPtr {} + +unsafe fn operator_read(op: ffi::OperatorPtr, path: String) -> RustFutureRead { + RustFutureRead::fallible(async move { + Ok((*op) + .0 + .read(&path) + .await + .map_err(|e| CxxAsyncException::new(e.to_string().into_boxed_str()))? + .to_vec()) + }) +} + +unsafe fn operator_write(op: ffi::OperatorPtr, path: String, bs: Vec) -> RustFutureWrite { + RustFutureWrite::fallible(async move { + Ok((*op) + .0 + .write(&path, bs) + .await + .map_err(|e| CxxAsyncException::new(e.to_string().into_boxed_str()))?) + }) +} diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index a37807c078e6..957134501cc7 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#[cfg(feature = "async")] +mod r#async; mod lister; mod reader; mod types; diff --git a/bindings/cpp/src/opendal_async.cpp b/bindings/cpp/src/opendal_async.cpp new file mode 100644 index 000000000000..6ec2dccad9f2 --- /dev/null +++ b/bindings/cpp/src/opendal_async.cpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "opendal_async.hpp" + +#include + +#include "async.rs.h" +#include "async_defs.hpp" + +#define RUST_STR(s) rust::Str(s.data(), s.size()) +#define RUST_STRING(s) rust::String(s.data(), s.size()) + +using namespace opendal::async; + +static rust::Box new_operator( + std::string_view scheme, + const std::unordered_map &config) { + auto rust_map = rust::Vec(); + rust_map.reserve(config.size()); + for (auto &[k, v] : config) { + rust_map.push_back({RUST_STRING(k), RUST_STRING(v)}); + } + + return opendal::ffi::async::new_operator(RUST_STR(scheme), rust_map); +} + +Operator::Operator(std::string_view scheme, + const std::unordered_map &config) + : operator_(new_operator(scheme, config)) {} + +Operator::ReadFuture Operator::read(std::string_view path) { + return opendal::ffi::async::operator_read( + opendal::ffi::async::OperatorPtr{&*operator_}, RUST_STRING(path)); +} + +Operator::WriteFuture Operator::write(std::string_view path, + std::span data) { + rust::Vec vec; + std::copy(data.begin(), data.end(), std::back_inserter(vec)); + + return opendal::ffi::async::operator_write( + opendal::ffi::async::OperatorPtr{&*operator_}, RUST_STRING(path), vec); +} diff --git a/bindings/cpp/tests/async_test.cpp b/bindings/cpp/tests/async_test.cpp new file mode 100644 index 000000000000..dc25c20685f3 --- /dev/null +++ b/bindings/cpp/tests/async_test.cpp @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include "cppcoro/sync_wait.hpp" +#include "cppcoro/task.hpp" +#include "gtest/gtest.h" +#include "opendal_async.hpp" + +class AsyncOpendalTest : public ::testing::Test { + protected: + std::optional op; + + std::string scheme; + std::unordered_map config; + + // random number generator + std::mt19937 rng; + + void SetUp() override { + scheme = "memory"; + rng.seed(time(nullptr)); + + op = opendal::async::Operator(scheme, config); + } +}; + +TEST_F(AsyncOpendalTest, BasicTest) { + auto path = "test_path"; + std::vector data{1, 2, 3, 4, 5}; + cppcoro::sync_wait(op->write(path, data)); + auto res = cppcoro::sync_wait(op->read(path)); + for (size_t i = 0; i < data.size(); ++i) EXPECT_EQ(data[i], res[i]); + + path = "test_path2"; + cppcoro::sync_wait([&]() -> cppcoro::task { + co_await op->write(path, data); + auto res = co_await op->read(path); + for (size_t i = 0; i < data.size(); ++i) EXPECT_EQ(data[i], res[i]); + co_return; + }()); +} diff --git a/bindings/go/operator_info.go b/bindings/go/operator_info.go index d9264b6c869e..70f5c1e5532e 100644 --- a/bindings/go/operator_info.go +++ b/bindings/go/operator_info.go @@ -240,6 +240,10 @@ func (c *Capability) BatchMaxOperations() uint { return c.inner.batchMaxOperations } +func (c *Capability) Shared() bool { + return c.inner.shared == 1 +} + func (c *Capability) Blocking() bool { return c.inner.blocking == 1 } diff --git a/bindings/go/tests/behavior_tests/go.mod b/bindings/go/tests/behavior_tests/go.mod index dae1477ff169..fcaba00ec69c 100644 --- a/bindings/go/tests/behavior_tests/go.mod +++ b/bindings/go/tests/behavior_tests/go.mod @@ -21,7 +21,6 @@ go 1.22.5 require ( github.com/apache/opendal-go-services/fs v0.1.3 - github.com/apache/opendal-go-services/memory v0.1.3 github.com/apache/opendal/bindings/go v0.0.0-20240719044908-d9d4279b3a24 github.com/google/uuid v1.6.0 github.com/stretchr/testify v1.9.0 diff --git a/bindings/go/tests/behavior_tests/opendal_test.go b/bindings/go/tests/behavior_tests/opendal_test.go index 8991f971a3ec..c421885b4d3d 100644 --- a/bindings/go/tests/behavior_tests/opendal_test.go +++ b/bindings/go/tests/behavior_tests/opendal_test.go @@ -31,7 +31,6 @@ import ( "testing" "github.com/apache/opendal-go-services/fs" - "github.com/apache/opendal-go-services/memory" opendal "github.com/apache/opendal/bindings/go" "github.com/google/uuid" "github.com/stretchr/testify/require" @@ -39,7 +38,6 @@ import ( // Add more schemes for behavior tests here. var schemes = []opendal.Scheme{ - memory.Scheme, fs.Scheme, } diff --git a/bindings/go/types.go b/bindings/go/types.go index d5bd92495d29..79a4f3fa84e4 100644 --- a/bindings/go/types.go +++ b/bindings/go/types.go @@ -163,6 +163,7 @@ var ( &ffi.TypeUint8, // batch &ffi.TypeUint8, // batch_delete &ffi.TypePointer, // batch_max_operations + &ffi.TypeUint8, // shared &ffi.TypeUint8, // blocking nil, }[0], @@ -204,6 +205,7 @@ type opendalCapability struct { batch uint8 batchDelete uint8 batchMaxOperations uint + shared uint8 blocking uint8 } diff --git a/bindings/java/src/lib.rs b/bindings/java/src/lib.rs index a9fef7e6506f..d19da1fb2fb6 100644 --- a/bindings/java/src/lib.rs +++ b/bindings/java/src/lib.rs @@ -94,7 +94,7 @@ fn make_operator_info<'a>(env: &mut JNIEnv<'a>, info: OperatorInfo) -> Result(env: &mut JNIEnv<'a>, cap: Capability) -> Result> { let capability = env.new_object( "org/apache/opendal/Capability", - "(ZZZZZZZZZZZZZZZJJZZZZZZZZZZZZZZJZ)V", + "(ZZZZZZZZZZZZZZZJJZZZZZZZZZZZZZZJZZ)V", &[ JValue::Bool(cap.stat as jboolean), JValue::Bool(cap.stat_with_if_match as jboolean), @@ -128,6 +128,7 @@ fn make_capability<'a>(env: &mut JNIEnv<'a>, cap: Capability) -> Result bool { + self.0.shared + } + /// If operator supports blocking. #[napi(getter)] pub fn blocking(&self) -> bool { diff --git a/bindings/ocaml/lib/operator.mli b/bindings/ocaml/lib/operator.mli index a7a31a6c4f4a..7b0f0e0bac6d 100644 --- a/bindings/ocaml/lib/operator.mli +++ b/bindings/ocaml/lib/operator.mli @@ -21,12 +21,14 @@ val new_operator : string -> (string * string) list -> (Opendal_core.Operator.operator, string) result -(** [new_operator scheme config_map] Create a new block operator from given scheme and config_map. +(** [new_operator scheme config_map] Create a new block operator from given + scheme and config_map. - @param scheme Supported services, for details, refer to https://opendal.apache.org/docs/category/services/ + @param scheme + Supported services, for details, refer to + https://opendal.apache.org/docs/category/services/ @param config_map Configuration information required by the target service - @return The block operator -*) + @return The block operator *) val list : Opendal_core.Operator.operator -> @@ -37,20 +39,19 @@ val stat : Opendal_core.Operator.operator -> string -> (Opendal_core.Operator.metadata, string) result -(** [is_exist operator path] Get current path's metadata **without cache** directly. +(** [is_exist operator path] Get current path's metadata **without cache** + directly. @param operator The operator @param path want to stat - @return metadata -*) + @return metadata *) val is_exist : Opendal_core.Operator.operator -> string -> (bool, string) result (** [is_exist operator path] Check if this path exists or not. @param operator The operator @param path want to check - @return is exists -*) + @return is exists *) val create_dir : Opendal_core.Operator.operator -> string -> (bool, string) result @@ -58,17 +59,17 @@ val create_dir : # Notes - To indicate that a path is a directory, it is compulsory to include - a trailing / in the path. Failure to do so may result in - `NotADirectory` error being returned by OpenDAL. + To indicate that a path is a directory, it is compulsory to include a + trailing / in the path. Failure to do so may result in `NotADirectory` error + being returned by OpenDAL. # Behavior - Create on existing dir will succeed. - Create dir is always recursive, works like `mkdir -p` + @param operator The operator - @param path want to create dir -*) + @param path want to create dir *) val read : Opendal_core.Operator.operator -> string -> (char array, string) result @@ -76,8 +77,7 @@ val read : @param operator The operator @param path want to read - @return data of path -*) + @return data of path *) val reader : Opendal_core.Operator.operator -> @@ -87,17 +87,17 @@ val reader : @param operator The operator @param path want to read - @return reader -*) + @return reader *) val write : Opendal_core.Operator.operator -> string -> bytes -> (unit, string) result (** [write operator path data] Write bytes into given path. - - Write will make sure all bytes has been written, or an error will be returned. + - Write will make sure all bytes has been written, or an error will be + returned. + @param operator The operator @param path want to write - @param data want to write -*) + @param data want to write *) val copy : Opendal_core.Operator.operator -> string -> string -> (unit, string) result @@ -105,11 +105,12 @@ val copy : - [from] and [to] must be a file. - [to] will be overwritten if it exists. - If [from] and [to] are the same, nothing will happen. - - copy is idempotent. For same [from] and [to] input, the result will be the same. + - copy is idempotent. For same [from] and [to] input, the result will be the + same. + @param operator The operator @param from file path - @param to file path -*) + @param to file path *) val rename : Opendal_core.Operator.operator -> string -> string -> (unit, string) result @@ -117,33 +118,34 @@ val rename : - [from] and [to] must be a file. - [to] will be overwritten if it exists. - If [from] and [to] are the same, a `IsSameFile` error will occur. + @param operator The operator @param from file path - @param to file path -*) + @param to file path *) val delete : Opendal_core.Operator.operator -> string -> (unit, string) result (** [delete operator path] Delete given path. - Delete not existing error won't return errors. + @param operator The block operator - @param path file path -*) + @param path file path *) val remove : Opendal_core.Operator.operator -> string array -> (unit, string) result (** [remove operator paths] Remove path array. - We don't support batch delete now, will call delete on each object in turn + @param operator The block operator - @param paths file path array -*) + @param paths file path array *) val remove_all : Opendal_core.Operator.operator -> string -> (unit, string) result -(** [remove_all operator path] Remove the path and all nested dirs and files recursively. +(** [remove_all operator path] Remove the path and all nested dirs and files + recursively. - We don't support batch delete now, will call delete on each object in turn + @param operator The block operator - @param path file path -*) + @param path file path *) module Reader : sig val pread : diff --git a/bindings/ocaml/src/operator/mod.rs b/bindings/ocaml/src/operator/mod.rs index e6cddc3e27bc..a2dc06a7fd2f 100644 --- a/bindings/ocaml/src/operator/mod.rs +++ b/bindings/ocaml/src/operator/mod.rs @@ -60,7 +60,7 @@ pub fn blocking_stat( #[ocaml::func] #[ocaml::sig("operator -> string -> (bool, string) Result.t ")] pub fn blocking_is_exist(operator: &mut Operator, path: String) -> Result { - map_res_error(operator.0.is_exist(path.as_str())) + map_res_error(operator.0.exists(path.as_str())) } #[ocaml::func] diff --git a/bindings/python/CONTRIBUTING.md b/bindings/python/CONTRIBUTING.md index 0cce0a8f0f63..e8c8dec0f5f6 100644 --- a/bindings/python/CONTRIBUTING.md +++ b/bindings/python/CONTRIBUTING.md @@ -46,7 +46,7 @@ After `venv` has been prepared, you can activate it by `source venv/bin/activate To simplify our work, we will utilize the tool [`maturin`](https://github.com/PyO3/maturin). Kindly install it beforehand. ```shell -pip install maturin[patchelf] +pip install 'maturin[patchelf]' ``` ## Build diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 9f418bd9a2dc..60164df7b85e 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -149,6 +149,8 @@ services-tikv = ["opendal/services-tikv"] services-upyun = ["opendal/services-upyun"] services-vercel-artifacts = ["opendal/services-vercel-artifacts"] services-yandex-disk = ["opendal/services-yandex-disk"] +# we build cp311-abi3 and cp310 wheels now, move this to pyo3 after we drop cp310 +abi3 = ["pyo3/abi3-py311"] [lib] crate-type = ["cdylib"] @@ -161,10 +163,8 @@ futures = "0.3.28" opendal = { version = ">=0", path = "../../core", features = [ "layers-blocking", ] } -pyo3 = { version = "0.22.5", features = ["abi3", "abi3-py311"] } -pyo3-async-runtimes = { version = "0.22.0", features = [ - "tokio-runtime", -] } +pyo3 = { version = "0.23.3", features = ["generate-import-lib"] } +pyo3-async-runtimes = { version = "0.23.0", features = ["tokio-runtime"] } tokio = "1" [target.'cfg(unix)'.dependencies.opendal] diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index fd58e48ff34a..728e042ed6bc 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -23,13 +23,14 @@ requires = ["maturin>=1.0,<2.0"] classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", ] description = "Apache OpenDAL™ Python Binding" license = { text = "Apache-2.0" } name = "opendal" readme = "README.md" -requires-python = ">=3.11" +requires-python = ">=3.10" + +dynamic = ['version'] [project.optional-dependencies] benchmark = [ diff --git a/bindings/python/python/opendal/__init__.pyi b/bindings/python/python/opendal/__init__.pyi index 2a2ae246b3f8..418d8841a662 100644 --- a/bindings/python/python/opendal/__init__.pyi +++ b/bindings/python/python/opendal/__init__.pyi @@ -204,4 +204,5 @@ class Capability: batch_delete: bool batch_max_operations: Optional[int] + shared: bool blocking: bool diff --git a/bindings/python/src/capability.rs b/bindings/python/src/capability.rs index 6cc76e153f4d..ac424ba698fc 100644 --- a/bindings/python/src/capability.rs +++ b/bindings/python/src/capability.rs @@ -105,6 +105,9 @@ pub struct Capability { /// The max operations that operator supports in batch. pub batch_max_operations: Option, + /// If operator supports shared. + pub shared: bool, + /// If operator supports blocking. pub blocking: bool, } @@ -147,6 +150,7 @@ impl Capability { batch: capability.batch, batch_delete: capability.batch_delete, batch_max_operations: capability.batch_max_operations, + shared: capability.shared, blocking: capability.blocking, } } diff --git a/bindings/python/src/file.rs b/bindings/python/src/file.rs index b23c6d2903cd..2c2172324bb7 100644 --- a/bindings/python/src/file.rs +++ b/bindings/python/src/file.rs @@ -32,6 +32,7 @@ use pyo3::buffer::PyBuffer; use pyo3::exceptions::PyIOError; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3::IntoPyObjectExt; use pyo3_async_runtimes::tokio::future_into_py; use tokio::sync::Mutex; @@ -350,6 +351,7 @@ impl AsyncFile { #[pymethods] impl AsyncFile { /// Read and return at most size bytes, or if size is not given, until EOF. + #[pyo3(signature = (size=None))] pub fn read<'p>(&'p self, py: Python<'p>, size: Option) -> PyResult> { let state = self.0.clone(); @@ -462,12 +464,13 @@ impl AsyncFile { } }; - let ret = reader + let pos = reader .seek(whence) .await .map_err(|err| PyIOError::new_err(err.to_string()))?; - Ok(Python::with_gil(|py| ret.into_py(py))) + Ok(pos) }) + .and_then(|pos| pos.into_bound_py_any(py)) } /// Return the current stream position. @@ -494,8 +497,9 @@ impl AsyncFile { .stream_position() .await .map_err(|err| PyIOError::new_err(err.to_string()))?; - Ok(Python::with_gil(|py| pos.into_py(py))) + Ok(pos) }) + .and_then(|pos| pos.into_bound_py_any(py)) } fn close<'p>(&'p mut self, py: Python<'p>) -> PyResult> { @@ -513,7 +517,7 @@ impl AsyncFile { } fn __aenter__<'a>(slf: PyRef<'a, Self>, py: Python<'a>) -> PyResult> { - let slf = slf.into_py(py); + let slf = slf.into_py_any(py)?; future_into_py(py, async move { Ok(slf) }) } diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index b499dc42aef2..578826add64d 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -70,7 +70,7 @@ pub use options::*; /// /// asyncio.run(main()) /// ``` -#[pymodule] +#[pymodule(gil_used = false)] fn _opendal(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; @@ -87,35 +87,29 @@ fn _opendal(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; // Layer module - let layers_module = PyModule::new_bound(py, "layers")?; + let layers_module = PyModule::new(py, "layers")?; layers_module.add_class::()?; layers_module.add_class::()?; layers_module.add_class::()?; m.add_submodule(&layers_module)?; - py.import_bound("sys")? + py.import("sys")? .getattr("modules")? .set_item("opendal.layers", layers_module)?; - let exception_module = PyModule::new_bound(py, "exceptions")?; - exception_module.add("Error", py.get_type_bound::())?; - exception_module.add("Unexpected", py.get_type_bound::())?; - exception_module.add("Unsupported", py.get_type_bound::())?; - exception_module.add("ConfigInvalid", py.get_type_bound::())?; - exception_module.add("NotFound", py.get_type_bound::())?; - exception_module.add( - "PermissionDenied", - py.get_type_bound::(), - )?; - exception_module.add("IsADirectory", py.get_type_bound::())?; - exception_module.add("NotADirectory", py.get_type_bound::())?; - exception_module.add("AlreadyExists", py.get_type_bound::())?; - exception_module.add("IsSameFile", py.get_type_bound::())?; - exception_module.add( - "ConditionNotMatch", - py.get_type_bound::(), - )?; + let exception_module = PyModule::new(py, "exceptions")?; + exception_module.add("Error", py.get_type::())?; + exception_module.add("Unexpected", py.get_type::())?; + exception_module.add("Unsupported", py.get_type::())?; + exception_module.add("ConfigInvalid", py.get_type::())?; + exception_module.add("NotFound", py.get_type::())?; + exception_module.add("PermissionDenied", py.get_type::())?; + exception_module.add("IsADirectory", py.get_type::())?; + exception_module.add("NotADirectory", py.get_type::())?; + exception_module.add("AlreadyExists", py.get_type::())?; + exception_module.add("IsSameFile", py.get_type::())?; + exception_module.add("ConditionNotMatch", py.get_type::())?; m.add_submodule(&exception_module)?; - py.import_bound("sys")? + py.import("sys")? .getattr("modules")? .set_item("opendal.exceptions", exception_module)?; Ok(()) diff --git a/bindings/python/src/lister.rs b/bindings/python/src/lister.rs index e2074016fae3..6019689de282 100644 --- a/bindings/python/src/lister.rs +++ b/bindings/python/src/lister.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use futures::TryStreamExt; use pyo3::exceptions::PyStopAsyncIteration; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use pyo3_async_runtimes::tokio::future_into_py; use tokio::sync::Mutex; @@ -42,7 +42,7 @@ impl BlockingLister { } fn __next__(mut slf: PyRefMut<'_, Self>) -> PyResult> { match slf.0.next() { - Some(Ok(entry)) => Ok(Some(Entry::new(entry).into_py(slf.py()))), + Some(Ok(entry)) => Ok(Some(Entry::new(entry).into_py_any(slf.py())?)), Some(Err(err)) => { let pyerr = format_pyerr(err); Err(pyerr) @@ -72,10 +72,17 @@ impl AsyncLister { let mut lister = lister.lock().await; let entry = lister.try_next().await.map_err(format_pyerr)?; match entry { - Some(entry) => Ok(Python::with_gil(|py| Entry::new(entry).into_py(py))), + Some(entry) => Python::with_gil(|py| { + let py_obj = Entry::new(entry).into_py_any(py)?; + Ok(Some(py_obj)) + }), None => Err(PyStopAsyncIteration::new_err("stream exhausted")), } - })?; - Ok(Some(fut.into())) + }); + + match fut { + Ok(fut) => Ok(Some(fut.into())), + Err(e) => Err(e), + } } } diff --git a/bindings/python/src/operator.rs b/bindings/python/src/operator.rs index 0e6c51ce1071..76cdb09f8653 100644 --- a/bindings/python/src/operator.rs +++ b/bindings/python/src/operator.rs @@ -22,6 +22,8 @@ use std::time::Duration; use pyo3::prelude::*; use pyo3::types::PyBytes; use pyo3::types::PyDict; +use pyo3::types::PyTuple; +use pyo3::IntoPyObjectExt; use pyo3_async_runtimes::tokio::future_into_py; use crate::*; @@ -45,7 +47,11 @@ fn build_operator( /// /// Create a new blocking `Operator` with the given `scheme` and options(`**kwargs`). #[pyclass(module = "opendal")] -pub struct Operator(ocore::BlockingOperator); +pub struct Operator { + core: ocore::BlockingOperator, + __scheme: ocore::Scheme, + __map: HashMap, +} #[pymethods] impl Operator { @@ -65,18 +71,26 @@ impl Operator { }) .unwrap_or_default(); - Ok(Operator(build_operator(scheme, map)?.blocking())) + Ok(Operator { + core: build_operator(scheme.clone(), map.clone())?.blocking(), + __scheme: scheme, + __map: map, + }) } /// Add new layers upon existing operator pub fn layer(&self, layer: &layers::Layer) -> PyResult { - let op = layer.0.layer(self.0.clone().into()); - Ok(Self(op.blocking())) + let op = layer.0.layer(self.core.clone().into()); + Ok(Self { + core: op.blocking(), + __scheme: self.__scheme.clone(), + __map: self.__map.clone(), + }) } /// Open a file-like reader for the given path. pub fn open(&self, path: String, mode: String) -> PyResult { - let this = self.0.clone(); + let this = self.core.clone(); if mode == "rb" { let r = this .reader(&path) @@ -96,7 +110,7 @@ impl Operator { /// Read the whole path into bytes. pub fn read<'p>(&'p self, py: Python<'p>, path: &str) -> PyResult> { - let buffer = self.0.read(path).map_err(format_pyerr)?.to_vec(); + let buffer = self.core.read(path).map_err(format_pyerr)?.to_vec(); Buffer::new(buffer).into_bytes_ref(py) } @@ -104,7 +118,7 @@ impl Operator { #[pyo3(signature = (path, bs, **kwargs))] pub fn write(&self, path: &str, bs: Vec, kwargs: Option) -> PyResult<()> { let kwargs = kwargs.unwrap_or_default(); - let mut write = self.0.write_with(path, bs).append(kwargs.append); + let mut write = self.core.write_with(path, bs).append(kwargs.append); if let Some(chunk) = kwargs.chunk { write = write.chunk(chunk); } @@ -123,22 +137,25 @@ impl Operator { /// Get current path's metadata **without cache** directly. pub fn stat(&self, path: &str) -> PyResult { - self.0.stat(path).map_err(format_pyerr).map(Metadata::new) + self.core + .stat(path) + .map_err(format_pyerr) + .map(Metadata::new) } /// Copy source to target. pub fn copy(&self, source: &str, target: &str) -> PyResult<()> { - self.0.copy(source, target).map_err(format_pyerr) + self.core.copy(source, target).map_err(format_pyerr) } /// Rename filename. pub fn rename(&self, source: &str, target: &str) -> PyResult<()> { - self.0.rename(source, target).map_err(format_pyerr) + self.core.rename(source, target).map_err(format_pyerr) } /// Remove all file pub fn remove_all(&self, path: &str) -> PyResult<()> { - self.0.remove_all(path).map_err(format_pyerr) + self.core.remove_all(path).map_err(format_pyerr) } /// Create a dir at given path. @@ -154,7 +171,7 @@ impl Operator { /// - Create on existing dir will succeed. /// - Create dir is always recursive, works like `mkdir -p` pub fn create_dir(&self, path: &str) -> PyResult<()> { - self.0.create_dir(path).map_err(format_pyerr) + self.core.create_dir(path).map_err(format_pyerr) } /// Delete given path. @@ -163,19 +180,19 @@ impl Operator { /// /// - Delete not existing error won't return errors. pub fn delete(&self, path: &str) -> PyResult<()> { - self.0.delete(path).map_err(format_pyerr) + self.core.delete(path).map_err(format_pyerr) } /// List current dir path. pub fn list(&self, path: &str) -> PyResult { - let l = self.0.lister(path).map_err(format_pyerr)?; + let l = self.core.lister(path).map_err(format_pyerr)?; Ok(BlockingLister::new(l)) } /// List dir in flat way. pub fn scan(&self, path: &str) -> PyResult { let l = self - .0 + .core .lister_with(path) .recursive(true) .call() @@ -184,15 +201,21 @@ impl Operator { } pub fn capability(&self) -> PyResult { - Ok(capability::Capability::new(self.0.info().full_capability())) + Ok(capability::Capability::new( + self.core.info().full_capability(), + )) } pub fn to_async_operator(&self) -> PyResult { - Ok(AsyncOperator(self.0.clone().into())) + Ok(AsyncOperator { + core: self.core.clone().into(), + __scheme: self.__scheme.clone(), + __map: self.__map.clone(), + }) } fn __repr__(&self) -> String { - let info = self.0.info(); + let info = self.core.info(); let name = info.name(); if name.is_empty() { format!("Operator(\"{}\", root=\"{}\")", info.scheme(), info.root()) @@ -204,13 +227,24 @@ impl Operator { ) } } + + fn __getnewargs_ex__(&self, py: Python) -> PyResult { + let args = vec![self.__scheme.to_string()]; + let args = PyTuple::new(py, args)?.into_py_any(py)?; + let kwargs = self.__map.clone().into_py_any(py)?; + Ok(PyTuple::new(py, [args, kwargs])?.into_py_any(py)?) + } } /// `AsyncOperator` is the entry for all public async APIs /// /// Create a new `AsyncOperator` with the given `scheme` and options(`**kwargs`). #[pyclass(module = "opendal")] -pub struct AsyncOperator(ocore::Operator); +pub struct AsyncOperator { + core: ocore::Operator, + __scheme: ocore::Scheme, + __map: HashMap, +} #[pymethods] impl AsyncOperator { @@ -230,13 +264,21 @@ impl AsyncOperator { }) .unwrap_or_default(); - Ok(AsyncOperator(build_operator(scheme, map)?)) + Ok(AsyncOperator { + core: build_operator(scheme.clone(), map.clone())?.into(), + __scheme: scheme, + __map: map, + }) } /// Add new layers upon existing operator pub fn layer(&self, layer: &layers::Layer) -> PyResult { - let op = layer.0.layer(self.0.clone()); - Ok(Self(op)) + let op = layer.0.layer(self.core.clone()); + Ok(Self { + core: op, + __scheme: self.__scheme.clone(), + __map: self.__map.clone(), + }) } /// Open a file-like reader for the given path. @@ -246,7 +288,7 @@ impl AsyncOperator { path: String, mode: String, ) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { if mode == "rb" { @@ -271,7 +313,7 @@ impl AsyncOperator { /// Read the whole path into bytes. pub fn read<'p>(&'p self, py: Python<'p>, path: String) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { let res: Vec = this.read(&path).await.map_err(format_pyerr)?.to_vec(); Python::with_gil(|py| Buffer::new(res).into_bytes(py)) @@ -288,7 +330,7 @@ impl AsyncOperator { kwargs: Option, ) -> PyResult> { let kwargs = kwargs.unwrap_or_default(); - let this = self.0.clone(); + let this = self.core.clone(); let bs = bs.as_bytes().to_vec(); future_into_py(py, async move { let mut write = this.write_with(&path, bs).append(kwargs.append); @@ -310,7 +352,7 @@ impl AsyncOperator { /// Get current path's metadata **without cache** directly. pub fn stat<'p>(&'p self, py: Python<'p>, path: String) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { let res: Metadata = this .stat(&path) @@ -329,7 +371,7 @@ impl AsyncOperator { source: String, target: String, ) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { this.copy(&source, &target).await.map_err(format_pyerr) }) @@ -342,7 +384,7 @@ impl AsyncOperator { source: String, target: String, ) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { this.rename(&source, &target).await.map_err(format_pyerr) }) @@ -350,7 +392,7 @@ impl AsyncOperator { /// Remove all file pub fn remove_all<'p>(&'p self, py: Python<'p>, path: String) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { this.remove_all(&path).await.map_err(format_pyerr) }) @@ -369,7 +411,7 @@ impl AsyncOperator { /// - Create on existing dir will succeed. /// - Create dir is always recursive, works like `mkdir -p` pub fn create_dir<'p>(&'p self, py: Python<'p>, path: String) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { this.create_dir(&path).await.map_err(format_pyerr) }) @@ -381,7 +423,7 @@ impl AsyncOperator { /// /// - Delete not existing error won't return errors. pub fn delete<'p>(&'p self, py: Python<'p>, path: String) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py( py, async move { this.delete(&path).await.map_err(format_pyerr) }, @@ -390,24 +432,26 @@ impl AsyncOperator { /// List current dir path. pub fn list<'p>(&'p self, py: Python<'p>, path: String) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { let lister = this.lister(&path).await.map_err(format_pyerr)?; - let pylister: PyObject = Python::with_gil(|py| AsyncLister::new(lister).into_py(py)); + let pylister = Python::with_gil(|py| AsyncLister::new(lister).into_py_any(py))?; + Ok(pylister) }) } /// List dir in flat way. pub fn scan<'p>(&'p self, py: Python<'p>, path: String) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { let lister = this .lister_with(&path) .recursive(true) .await .map_err(format_pyerr)?; - let pylister: PyObject = Python::with_gil(|py| AsyncLister::new(lister).into_py(py)); + let pylister: PyObject = + Python::with_gil(|py| AsyncLister::new(lister).into_py_any(py))?; Ok(pylister) }) } @@ -419,7 +463,7 @@ impl AsyncOperator { path: String, expire_second: u64, ) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { let res = this .presign_stat(&path, Duration::from_secs(expire_second)) @@ -438,7 +482,7 @@ impl AsyncOperator { path: String, expire_second: u64, ) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { let res = this .presign_read(&path, Duration::from_secs(expire_second)) @@ -457,7 +501,7 @@ impl AsyncOperator { path: String, expire_second: u64, ) -> PyResult> { - let this = self.0.clone(); + let this = self.core.clone(); future_into_py(py, async move { let res = this .presign_write(&path, Duration::from_secs(expire_second)) @@ -470,15 +514,21 @@ impl AsyncOperator { } pub fn capability(&self) -> PyResult { - Ok(capability::Capability::new(self.0.info().full_capability())) + Ok(capability::Capability::new( + self.core.info().full_capability(), + )) } pub fn to_operator(&self) -> PyResult { - Ok(Operator(self.0.clone().blocking())) + Ok(Operator { + core: self.core.clone().blocking(), + __scheme: self.__scheme.clone(), + __map: self.__map.clone(), + }) } fn __repr__(&self) -> String { - let info = self.0.info(); + let info = self.core.info(); let name = info.name(); if name.is_empty() { format!( @@ -494,6 +544,13 @@ impl AsyncOperator { ) } } + + fn __getnewargs_ex__(&self, py: Python) -> PyResult { + let args = vec![self.__scheme.to_string()]; + let args = PyTuple::new(py, args)?.into_py_any(py)?; + let kwargs = self.__map.clone().into_py_any(py)?; + Ok(PyTuple::new(py, [args, kwargs])?.into_py_any(py)?) + } } #[pyclass(module = "opendal")] diff --git a/bindings/python/src/utils.rs b/bindings/python/src/utils.rs index eb58e85ef8e2..9970b3304e4f 100644 --- a/bindings/python/src/utils.rs +++ b/bindings/python/src/utils.rs @@ -19,6 +19,7 @@ use std::os::raw::c_int; use pyo3::ffi; use pyo3::prelude::*; +use pyo3::IntoPyObjectExt; /// A bytes-like object that implements buffer protocol. #[pyclass(module = "opendal")] @@ -33,14 +34,14 @@ impl Buffer { /// Consume self to build a bytes pub fn into_bytes(self, py: Python) -> PyResult> { - let buffer = self.into_py(py); + let buffer = self.into_py_any(py)?; unsafe { PyObject::from_owned_ptr_or_err(py, ffi::PyBytes_FromObject(buffer.as_ptr())) } } /// Consume self to build a bytes pub fn into_bytes_ref(self, py: Python) -> PyResult> { - let buffer = self.into_py(py); + let buffer = self.into_py_any(py)?; let view = unsafe { Bound::from_owned_ptr_or_err(py, ffi::PyBytes_FromObject(buffer.as_ptr()))? }; diff --git a/bindings/python/tests/test_async_pickle_types.py b/bindings/python/tests/test_async_pickle_types.py new file mode 100644 index 000000000000..c163be3c28f1 --- /dev/null +++ b/bindings/python/tests/test_async_pickle_types.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import pickle +from random import randint +from uuid import uuid4 +import os + + +@pytest.mark.asyncio +@pytest.mark.need_capability("read", "write", "delete", "shared") +async def test_operator_pickle(service_name, operator, async_operator): + """ + Test AsyncOperator's pickle serialization and deserialization. + """ + + size = randint(1, 1024) + filename = f"random_file_{str(uuid4())}" + content = os.urandom(size) + await async_operator.write(filename, content) + + serialized = pickle.dumps(async_operator) + + deserialized = pickle.loads(serialized) + assert await deserialized.read(filename) == content + + await async_operator.delete(filename) diff --git a/bindings/python/tests/test_pickle.py b/bindings/python/tests/test_pickle_rw.py similarity index 100% rename from bindings/python/tests/test_pickle.py rename to bindings/python/tests/test_pickle_rw.py diff --git a/bindings/python/tests/test_sync_pickle_types.py b/bindings/python/tests/test_sync_pickle_types.py new file mode 100644 index 000000000000..337ebfa6bdd0 --- /dev/null +++ b/bindings/python/tests/test_sync_pickle_types.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import pickle +from random import randint +from uuid import uuid4 +import os + + +@pytest.mark.need_capability("read", "write", "delete", "shared") +def test_operator_pickle(service_name, operator, async_operator): + """ + Test Operator's pickle serialization and deserialization. + """ + + size = randint(1, 1024) + filename = f"random_file_{str(uuid4())}" + content = os.urandom(size) + operator.write(filename, content) + + serialized = pickle.dumps(operator) + + deserialized = pickle.loads(serialized) + assert deserialized.read(filename) == content + + operator.delete(filename) diff --git a/bindings/ruby/src/capability.rs b/bindings/ruby/src/capability.rs index a2402dceb51c..32166bc0c45d 100644 --- a/bindings/ruby/src/capability.rs +++ b/bindings/ruby/src/capability.rs @@ -97,7 +97,8 @@ define_accessors!(Capability, { batch: bool, batch_delete: bool, batch_max_operations: Option, - blocking: bool + shared: bool, + blocking: bool, }); // includes class into the Ruby module diff --git a/core/src/layers/blocking.rs b/core/src/layers/blocking.rs index da6e7aee7e12..fc97a33703af 100644 --- a/core/src/layers/blocking.rs +++ b/core/src/layers/blocking.rs @@ -59,7 +59,7 @@ use crate::*; /// ## In async context with blocking functions /// /// If `BlockingLayer` is called in blocking function, please fetch a [`tokio::runtime::EnterGuard`] -/// first. You can use [`Handle::try_current`] first to get the handle and than call [`Handle::enter`]. +/// first. You can use [`Handle::try_current`] first to get the handle and then call [`Handle::enter`]. /// This often happens in the case that async function calls blocking function. /// /// ```rust,no_run diff --git a/core/src/layers/capability_check.rs b/core/src/layers/capability_check.rs new file mode 100644 index 000000000000..0066e031b3f1 --- /dev/null +++ b/core/src/layers/capability_check.rs @@ -0,0 +1,292 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::layers::correctness_check::new_unsupported_error; +use crate::raw::*; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; + +/// Add an extra capability check layer for every operation +/// +/// Similar to `CorrectnessChecker`, Before performing any operations, this layer will first verify +/// its arguments against the capability of the underlying service. If the arguments is not supported, +/// an error will be returned directly. +/// +/// Notes +/// +/// There are two main differences between this checker with the `CorrectnessChecker`: +/// 1. This checker provides additional checks for capabilities like write_with_content_type and +/// list_with_version, among others. These capabilities do not affect data integrity, even if +/// the underlying storage services do not support them. +/// +/// 2. OpenDAL doesn't apply this checker by default. Users can enable this layer if they want to +/// enforce stricter requirements. +/// +/// # examples +/// +/// ```no_run +/// # use opendal::layers::CapabilityCheckLayer; +/// # use opendal::services; +/// # use opendal::Operator; +/// # use opendal::Result; +/// # use opendal::Scheme; +/// +/// # fn main() -> Result<()> { +/// use opendal::layers::CapabilityCheckLayer; +/// let _ = Operator::new(services::Memory::default())? +/// .layer(CapabilityCheckLayer) +/// .finish(); +/// Ok(()) +/// # } +/// ``` +#[derive(Default)] +pub struct CapabilityCheckLayer; + +impl Layer for CapabilityCheckLayer { + type LayeredAccess = CapabilityAccessor; + + fn layer(&self, inner: A) -> Self::LayeredAccess { + CapabilityAccessor { + info: inner.info(), + inner, + } + } +} +pub struct CapabilityAccessor { + info: Arc, + inner: A, +} + +impl Debug for CapabilityAccessor { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CapabilityCheckAccessor") + .field("inner", &self.inner) + .finish_non_exhaustive() + } +} + +impl LayeredAccess for CapabilityAccessor { + type Inner = A; + type Reader = A::Reader; + type BlockingReader = A::BlockingReader; + type Writer = A::Writer; + type BlockingWriter = A::BlockingWriter; + type Lister = A::Lister; + type BlockingLister = A::BlockingLister; + + fn inner(&self) -> &Self::Inner { + &self.inner + } + + async fn read(&self, path: &str, args: OpRead) -> crate::Result<(RpRead, Self::Reader)> { + self.inner.read(path, args).await + } + + async fn write(&self, path: &str, args: OpWrite) -> crate::Result<(RpWrite, Self::Writer)> { + let capability = self.info.full_capability(); + if !capability.write_with_content_type && args.content_type().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::Write, + "content_type", + )); + } + if !capability.write_with_cache_control && args.cache_control().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::Write, + "cache_control", + )); + } + if !capability.write_with_content_disposition && args.content_disposition().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::Write, + "content_disposition", + )); + } + + self.inner.write(path, args).await + } + + async fn list(&self, path: &str, args: OpList) -> crate::Result<(RpList, Self::Lister)> { + let capability = self.info.full_capability(); + if !capability.list_with_version && args.version() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::List, + "version", + )); + } + + self.inner.list(path, args).await + } + + fn blocking_read( + &self, + path: &str, + args: OpRead, + ) -> crate::Result<(RpRead, Self::BlockingReader)> { + self.inner().blocking_read(path, args) + } + + fn blocking_write( + &self, + path: &str, + args: OpWrite, + ) -> crate::Result<(RpWrite, Self::BlockingWriter)> { + let capability = self.info.full_capability(); + if !capability.write_with_content_type && args.content_type().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingWrite, + "content_type", + )); + } + if !capability.write_with_cache_control && args.cache_control().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingWrite, + "cache_control", + )); + } + if !capability.write_with_content_disposition && args.content_disposition().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingWrite, + "content_disposition", + )); + } + + self.inner.blocking_write(path, args) + } + + fn blocking_list( + &self, + path: &str, + args: OpList, + ) -> crate::Result<(RpList, Self::BlockingLister)> { + let capability = self.info.full_capability(); + if !capability.list_with_version && args.version() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingList, + "version", + )); + } + + self.inner.blocking_list(path, args) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Capability, ErrorKind, Operator}; + + #[derive(Debug)] + struct MockService { + capability: Capability, + } + + impl Access for MockService { + type Reader = oio::Reader; + type Writer = oio::Writer; + type Lister = oio::Lister; + type BlockingReader = oio::BlockingReader; + type BlockingWriter = oio::BlockingWriter; + type BlockingLister = oio::BlockingLister; + + fn info(&self) -> Arc { + let mut info = AccessorInfo::default(); + info.set_native_capability(self.capability); + + info.into() + } + + async fn write(&self, _: &str, _: OpWrite) -> crate::Result<(RpWrite, Self::Writer)> { + Ok((RpWrite::new(), Box::new(()))) + } + + async fn list(&self, _: &str, _: OpList) -> crate::Result<(RpList, Self::Lister)> { + Ok((RpList {}, Box::new(()))) + } + } + + fn new_test_operator(capability: Capability) -> Operator { + let srv = MockService { capability }; + + Operator::from_inner(Arc::new(srv)).layer(CapabilityCheckLayer) + } + + #[tokio::test] + async fn test_writer_with() { + let op = new_test_operator(Capability { + write: true, + ..Default::default() + }); + let res = op.writer_with("path").content_type("type").await; + assert!(res.is_err()); + + let res = op.writer_with("path").cache_control("cache").await; + assert!(res.is_err()); + + let res = op + .writer_with("path") + .content_disposition("disposition") + .await; + assert!(res.is_err()); + + let op = new_test_operator(Capability { + write: true, + write_with_content_type: true, + write_with_cache_control: true, + write_with_content_disposition: true, + ..Default::default() + }); + let res = op.writer_with("path").content_type("type").await; + assert!(res.is_ok()); + + let res = op.writer_with("path").cache_control("cache").await; + assert!(res.is_ok()); + + let res = op + .writer_with("path") + .content_disposition("disposition") + .await; + assert!(res.is_ok()); + } + + #[tokio::test] + async fn test_list_with() { + let op = new_test_operator(Capability { + list: true, + ..Default::default() + }); + let res = op.list_with("path/").version(true).await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::Unsupported); + + let op = new_test_operator(Capability { + list: true, + list_with_version: true, + ..Default::default() + }); + let res = op.lister_with("path/").version(true).await; + assert!(res.is_ok()) + } +} diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index eb93fd4d3361..92adee9eba0c 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -98,11 +98,6 @@ use crate::*; /// - If support `list_with_recursive`, return directly. /// - if not, wrap with [`FlatLister`]. /// -/// ## Capability Check -/// -/// Before performing any operations, `CompleteLayer` will first check -/// the operation against capability of the underlying service. If the -/// operation is not supported, an error will be returned directly. pub struct CompleteLayer; impl Layer for CompleteLayer { @@ -129,28 +124,19 @@ impl Debug for CompleteAccessor { } impl CompleteAccessor { - fn new_unsupported_error(&self, op: impl Into<&'static str>) -> Error { - let scheme = self.info.scheme(); - let op = op.into(); - Error::new( - ErrorKind::Unsupported, - format!("service {scheme} doesn't support operation {op}"), - ) - .with_operation(op) - } - async fn complete_create_dir(&self, path: &str, args: OpCreateDir) -> Result { let capability = self.info.full_capability(); if capability.create_dir { return self.inner().create_dir(path, args).await; } + if capability.write_can_empty && capability.list { let (_, mut w) = self.inner.write(path, OpWrite::default()).await?; oio::Write::close(&mut w).await?; return Ok(RpCreateDir::default()); } - Err(self.new_unsupported_error(Operation::CreateDir)) + self.inner.create_dir(path, args).await } fn complete_blocking_create_dir(&self, path: &str, args: OpCreateDir) -> Result { @@ -158,20 +144,18 @@ impl CompleteAccessor { if capability.create_dir && capability.blocking { return self.inner().blocking_create_dir(path, args); } + if capability.write_can_empty && capability.list && capability.blocking { let (_, mut w) = self.inner.blocking_write(path, OpWrite::default())?; oio::BlockingWrite::close(&mut w)?; return Ok(RpCreateDir::default()); } - Err(self.new_unsupported_error(Operation::BlockingCreateDir)) + self.inner.blocking_create_dir(path, args) } async fn complete_stat(&self, path: &str, args: OpStat) -> Result { let capability = self.info.full_capability(); - if !capability.stat { - return Err(self.new_unsupported_error(Operation::Stat)); - } if path == "/" { return Ok(RpStat::new(Metadata::new(EntryMode::DIR))); @@ -188,7 +172,7 @@ impl CompleteAccessor { )); } - return Ok(RpStat::new(Metadata::new(EntryMode::DIR))); + return Ok(RpStat::new(meta)); } // Otherwise, we can simulate stat dir via `list`. @@ -214,9 +198,6 @@ impl CompleteAccessor { fn complete_blocking_stat(&self, path: &str, args: OpStat) -> Result { let capability = self.info.full_capability(); - if !capability.stat { - return Err(self.new_unsupported_error(Operation::Stat)); - } if path == "/" { return Ok(RpStat::new(Metadata::new(EntryMode::DIR))); @@ -262,9 +243,6 @@ impl CompleteAccessor { args: OpList, ) -> Result<(RpList, CompleteLister)> { let cap = self.info.full_capability(); - if !cap.list { - return Err(self.new_unsupported_error(Operation::List)); - } let recursive = args.recursive(); @@ -310,9 +288,6 @@ impl CompleteAccessor { args: OpList, ) -> Result<(RpList, CompleteLister)> { let cap = self.info.full_capability(); - if !cap.list { - return Err(self.new_unsupported_error(Operation::BlockingList)); - } let recursive = args.recursive(); @@ -383,11 +358,6 @@ impl LayeredAccess for CompleteAccessor { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let capability = self.info.full_capability(); - if !capability.read { - return Err(self.new_unsupported_error(Operation::Read)); - } - let size = args.range().size(); self.inner .read(path, args) @@ -396,62 +366,20 @@ impl LayeredAccess for CompleteAccessor { } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { - let capability = self.info.full_capability(); - if !capability.write { - return Err(self.new_unsupported_error(Operation::Write)); - } - if args.append() && !capability.write_can_append { - return Err(Error::new( - ErrorKind::Unsupported, - format!( - "service {} doesn't support operation write with append", - self.info.scheme() - ), - )); - } - let (rp, w) = self.inner.write(path, args.clone()).await?; let w = CompleteWriter::new(w); Ok((rp, w)) } - async fn copy(&self, from: &str, to: &str, args: OpCopy) -> Result { - let capability = self.info.full_capability(); - if !capability.copy { - return Err(self.new_unsupported_error(Operation::Copy)); - } - - self.inner().copy(from, to, args).await - } - - async fn rename(&self, from: &str, to: &str, args: OpRename) -> Result { - let capability = self.info.full_capability(); - if !capability.rename { - return Err(self.new_unsupported_error(Operation::Rename)); - } - - self.inner().rename(from, to, args).await - } - async fn stat(&self, path: &str, args: OpStat) -> Result { self.complete_stat(path, args).await } async fn delete(&self) -> Result<(RpDelete, Self::Deleter)> { - let capability = self.info.full_capability(); - if !capability.delete { - return Err(self.new_unsupported_error(Operation::Delete)); - } - self.inner().delete().await } async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { - let capability = self.info.full_capability(); - if !capability.list { - return Err(self.new_unsupported_error(Operation::List)); - } - self.complete_list(path, args).await } @@ -469,11 +397,6 @@ impl LayeredAccess for CompleteAccessor { } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - let capability = self.info.full_capability(); - if !capability.read || !capability.blocking { - return Err(self.new_unsupported_error(Operation::Read)); - } - let size = args.range().size(); self.inner .blocking_read(path, args) @@ -481,63 +404,20 @@ impl LayeredAccess for CompleteAccessor { } fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { - let capability = self.info.full_capability(); - if !capability.write || !capability.blocking { - return Err(self.new_unsupported_error(Operation::BlockingWrite)); - } - - if args.append() && !capability.write_can_append { - return Err(Error::new( - ErrorKind::Unsupported, - format!( - "service {} doesn't support operation write with append", - self.info.scheme() - ), - )); - } - self.inner .blocking_write(path, args) .map(|(rp, w)| (rp, CompleteWriter::new(w))) } - fn blocking_copy(&self, from: &str, to: &str, args: OpCopy) -> Result { - let capability = self.info.full_capability(); - if !capability.copy || !capability.blocking { - return Err(self.new_unsupported_error(Operation::BlockingCopy)); - } - - self.inner().blocking_copy(from, to, args) - } - - fn blocking_rename(&self, from: &str, to: &str, args: OpRename) -> Result { - let capability = self.info.full_capability(); - if !capability.rename || !capability.blocking { - return Err(self.new_unsupported_error(Operation::BlockingRename)); - } - - self.inner().blocking_rename(from, to, args) - } - fn blocking_stat(&self, path: &str, args: OpStat) -> Result { self.complete_blocking_stat(path, args) } fn blocking_delete(&self) -> Result<(RpDelete, Self::BlockingDeleter)> { - let capability = self.info.full_capability(); - if !capability.delete || !capability.blocking { - return Err(self.new_unsupported_error(Operation::BlockingDelete)); - } - self.inner().blocking_delete() } fn blocking_list(&self, path: &str, args: OpList) -> Result<(RpList, Self::BlockingLister)> { - let capability = self.info.full_capability(); - if !capability.list || !capability.blocking { - return Err(self.new_unsupported_error(Operation::BlockingList)); - } - self.complete_blocking_list(path, args) } } diff --git a/core/src/layers/correctness_check.rs b/core/src/layers/correctness_check.rs new file mode 100644 index 000000000000..a2368d54d154 --- /dev/null +++ b/core/src/layers/correctness_check.rs @@ -0,0 +1,378 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; + +use crate::raw::*; +use crate::*; + +/// Add a correctness capability check layer for every operation +/// +/// Before performing any operations, we will first verify the operation and its critical arguments +/// against the capability of the underlying service. If the operation or arguments is not supported, +/// an error will be returned directly. +/// +/// # Notes +/// +/// OpenDAL applies this checker to every accessor by default, so users don't need to invoke it manually. +/// this checker ensures the operation and its critical arguments, which might affect the correctness of +/// the call, are supported by the underlying service. +/// +/// for example, when calling `write_with_append`, but `append` is not supported by the underlying +/// service, an `Unsupported` error is returned. without this check, undesired data may be written. +#[derive(Default)] +pub struct CorrectnessCheckLayer; + +impl Layer for CorrectnessCheckLayer { + type LayeredAccess = CorrectnessAccessor; + + fn layer(&self, inner: A) -> Self::LayeredAccess { + CorrectnessAccessor { + info: inner.info(), + inner, + } + } +} + +pub(crate) fn new_unsupported_error(info: &AccessorInfo, op: Operation, args: &str) -> Error { + let scheme = info.scheme(); + let op = op.into_static(); + + Error::new( + ErrorKind::Unsupported, + format!("service {scheme} doesn't support operation {op} with args {args}"), + ) + .with_operation(op) +} + +pub struct CorrectnessAccessor { + info: Arc, + inner: A, +} + +impl Debug for CorrectnessAccessor { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CorrectnessCheckAccessor") + .field("inner", &self.inner) + .finish_non_exhaustive() + } +} + +impl LayeredAccess for CorrectnessAccessor { + type Inner = A; + type Reader = A::Reader; + type BlockingReader = A::BlockingReader; + type Writer = A::Writer; + type BlockingWriter = A::BlockingWriter; + type Lister = A::Lister; + type BlockingLister = A::BlockingLister; + + fn inner(&self) -> &Self::Inner { + &self.inner + } + + fn info(&self) -> Arc { + self.info.clone() + } + + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + let capability = self.info.full_capability(); + if !capability.read_with_version && args.version().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::Read, + "version", + )); + } + + self.inner.read(path, args).await + } + + async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { + let capability = self.info.full_capability(); + if args.append() && !capability.write_can_append { + return Err(new_unsupported_error( + &self.info, + Operation::Write, + "append", + )); + } + if args.if_not_exists() && !capability.write_with_if_not_exists { + return Err(new_unsupported_error( + &self.info, + Operation::Write, + "if_not_exists", + )); + } + if args.if_none_match().is_some() && !capability.write_with_if_none_match { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::Write, + "if_none_match", + )); + } + + self.inner.write(path, args).await + } + + async fn stat(&self, path: &str, args: OpStat) -> Result { + let capability = self.info.full_capability(); + if !capability.stat_with_version && args.version().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::Stat, + "version", + )); + } + + self.inner.stat(path, args).await + } + + async fn delete(&self, path: &str, args: OpDelete) -> Result { + let capability = self.info.full_capability(); + if !capability.delete_with_version && args.version().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::Delete, + "version", + )); + } + + self.inner.delete(path, args).await + } + + async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { + self.inner.list(path, args).await + } + + fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { + let capability = self.info.full_capability(); + if !capability.read_with_version && args.version().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingRead, + "version", + )); + } + + self.inner.blocking_read(path, args) + } + + fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { + let capability = self.info.full_capability(); + if args.append() && !capability.write_can_append { + return Err(new_unsupported_error( + &self.info, + Operation::BlockingWrite, + "append", + )); + } + if args.if_not_exists() && !capability.write_with_if_not_exists { + return Err(new_unsupported_error( + &self.info, + Operation::BlockingWrite, + "if_not_exists", + )); + } + if args.if_none_match().is_some() && !capability.write_with_if_none_match { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingWrite, + "if_none_match", + )); + } + + self.inner.blocking_write(path, args) + } + + fn blocking_stat(&self, path: &str, args: OpStat) -> Result { + let capability = self.info.full_capability(); + if !capability.stat_with_version && args.version().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingStat, + "version", + )); + } + + self.inner.blocking_stat(path, args) + } + + fn blocking_delete(&self, path: &str, args: OpDelete) -> Result { + let capability = self.info.full_capability(); + if !capability.delete_with_version && args.version().is_some() { + return Err(new_unsupported_error( + self.info.as_ref(), + Operation::BlockingDelete, + "version", + )); + } + + self.inner().blocking_delete(path, args) + } + + fn blocking_list(&self, path: &str, args: OpList) -> Result<(RpList, Self::BlockingLister)> { + self.inner.blocking_list(path, args) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::raw::oio; + use crate::{Capability, EntryMode, Metadata, Operator}; + + #[derive(Debug)] + struct MockService { + capability: Capability, + } + + impl Access for MockService { + type Reader = oio::Reader; + type Writer = oio::Writer; + type Lister = oio::Lister; + type BlockingReader = oio::BlockingReader; + type BlockingWriter = oio::BlockingWriter; + type BlockingLister = oio::BlockingLister; + + fn info(&self) -> Arc { + let mut info = AccessorInfo::default(); + info.set_native_capability(self.capability); + + info.into() + } + + async fn stat(&self, _: &str, _: OpStat) -> Result { + Ok(RpStat::new(Metadata::new(EntryMode::Unknown))) + } + + async fn read(&self, _: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok((RpRead::new(), Box::new(bytes::Bytes::new()))) + } + + async fn write(&self, _: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { + Ok((RpWrite::new(), Box::new(()))) + } + + async fn delete(&self, _: &str, _: OpDelete) -> Result { + Ok(RpDelete {}) + } + + async fn list(&self, _: &str, _: OpList) -> Result<(RpList, Self::Lister)> { + Ok((RpList {}, Box::new(()))) + } + } + + fn new_test_operator(capability: Capability) -> Operator { + let srv = MockService { capability }; + + Operator::from_inner(Arc::new(srv)).layer(CorrectnessCheckLayer) + } + + #[tokio::test] + async fn test_read() { + let op = new_test_operator(Capability { + read: true, + ..Default::default() + }); + let res = op.read_with("path").version("version").await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::Unsupported); + + let op = new_test_operator(Capability { + read: true, + read_with_version: true, + ..Default::default() + }); + let res = op.read_with("path").version("version").await; + assert!(res.is_ok()); + } + + #[tokio::test] + async fn test_stat() { + let op = new_test_operator(Capability { + stat: true, + ..Default::default() + }); + let res = op.stat_with("path").version("version").await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::Unsupported); + + let op = new_test_operator(Capability { + stat: true, + stat_with_version: true, + ..Default::default() + }); + let res = op.stat_with("path").version("version").await; + assert!(res.is_ok()); + } + + #[tokio::test] + async fn test_write_with() { + let op = new_test_operator(Capability { + write: true, + ..Default::default() + }); + let res = op.write_with("path", "".as_bytes()).append(true).await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::Unsupported); + + let res = op + .write_with("path", "".as_bytes()) + .if_not_exists(true) + .await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::Unsupported); + + let res = op + .write_with("path", "".as_bytes()) + .if_none_match("etag") + .await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::Unsupported); + + let op = new_test_operator(Capability { + write: true, + write_can_append: true, + write_with_if_not_exists: true, + write_with_if_none_match: true, + ..Default::default() + }); + let res = op.writer_with("path").append(true).await; + assert!(res.is_ok()); + } + + #[tokio::test] + async fn test_delete() { + let op = new_test_operator(Capability { + delete: true, + ..Default::default() + }); + let res = op.delete_with("path").version("version").await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::Unsupported); + + let op = new_test_operator(Capability { + delete: true, + delete_with_version: true, + ..Default::default() + }); + let res = op.delete_with("path").version("version").await; + assert!(res.is_ok()) + } +} diff --git a/core/src/layers/logging.rs b/core/src/layers/logging.rs index a2b109399380..f7723bc90290 100644 --- a/core/src/layers/logging.rs +++ b/core/src/layers/logging.rs @@ -230,7 +230,7 @@ impl LoggingInterceptor for DefaultLoggingInterceptor { struct LoggingContext<'a>(&'a [(&'a str, &'a str)]); -impl<'a> Display for LoggingContext<'a> { +impl Display for LoggingContext<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { for (i, (k, v)) in self.0.iter().enumerate() { if i > 0 { diff --git a/core/src/layers/metrics.rs b/core/src/layers/metrics.rs index 94ec8ba7ad54..8e577536d894 100644 --- a/core/src/layers/metrics.rs +++ b/core/src/layers/metrics.rs @@ -176,7 +176,7 @@ struct OperationLabels<'a> { error: Option, } -impl<'a> OperationLabels<'a> { +impl OperationLabels<'_> { /// labels: /// /// 1. `["scheme", "namespace", "root", "operation"]` diff --git a/core/src/layers/mod.rs b/core/src/layers/mod.rs index c43db2331aff..0ac8f5f67abe 100644 --- a/core/src/layers/mod.rs +++ b/core/src/layers/mod.rs @@ -113,3 +113,8 @@ mod dtrace; pub use self::dtrace::DtraceLayer; pub mod observe; + +mod correctness_check; +pub(crate) use correctness_check::CorrectnessCheckLayer; +mod capability_check; +pub use capability_check::CapabilityCheckLayer; diff --git a/core/src/layers/timeout.rs b/core/src/layers/timeout.rs index bd2c8d63b31c..2f34231222a2 100644 --- a/core/src/layers/timeout.rs +++ b/core/src/layers/timeout.rs @@ -107,9 +107,9 @@ use crate::*; /// This might introduce a bit overhead for IO operations, but it's the only way to implement /// timeout correctly. We used to implement timeout layer in zero cost way that only stores /// a [`std::time::Instant`] and check the timeout by comparing the instant with current time. -/// However, it doesn't works for all cases. +/// However, it doesn't work for all cases. /// -/// For examples, users TCP connection could be in [Busy ESTAB](https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die) state. In this state, no IO event will be emit. The runtime +/// For examples, users TCP connection could be in [Busy ESTAB](https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die) state. In this state, no IO event will be emitted. The runtime /// will never poll our future again. From the application side, this future is hanging forever /// until this TCP connection is closed for reaching the linux [net.ipv4.tcp_retries2](https://man7.org/linux/man-pages/man7/tcp.7.html) times. #[derive(Clone)] diff --git a/core/src/raw/accessor.rs b/core/src/raw/accessor.rs index 7e59f6df3f4c..bc9c40a4a98c 100644 --- a/core/src/raw/accessor.rs +++ b/core/src/raw/accessor.rs @@ -80,7 +80,7 @@ pub trait Access: Send + Sync + Debug + Unpin + 'static { /// This function is required to be implemented. /// /// By returning AccessorInfo, underlying services can declare - /// some useful information about it self. + /// some useful information about itself. /// /// - scheme: declare the scheme of backend. /// - capabilities: declare the capabilities of current backend. diff --git a/core/src/raw/adapters/typed_kv/api.rs b/core/src/raw/adapters/typed_kv/api.rs index f1e4a95fc47a..a3d0490845b8 100644 --- a/core/src/raw/adapters/typed_kv/api.rs +++ b/core/src/raw/adapters/typed_kv/api.rs @@ -130,6 +130,8 @@ pub struct Capability { pub delete: bool, /// If typed_kv operator supports scan natively. pub scan: bool, + /// If typed_kv operator supports shared access. + pub shared: bool, } impl Debug for Capability { @@ -148,6 +150,9 @@ impl Debug for Capability { if self.scan { s.push("Scan"); } + if self.shared { + s.push("Shared"); + } write!(f, "{{ {} }}", s.join(" | ")) } diff --git a/core/src/raw/adapters/typed_kv/backend.rs b/core/src/raw/adapters/typed_kv/backend.rs index a500e2c771f0..1005811c9443 100644 --- a/core/src/raw/adapters/typed_kv/backend.rs +++ b/core/src/raw/adapters/typed_kv/backend.rs @@ -89,6 +89,10 @@ impl Access for Backend { cap.list_with_recursive = true; } + if kv_cap.shared { + cap.shared = true; + } + cap.blocking = true; am.set_native_capability(cap); diff --git a/core/src/raw/futures_util.rs b/core/src/raw/futures_util.rs index 3abf645b1c5c..3d182bf49ab9 100644 --- a/core/src/raw/futures_util.rs +++ b/core/src/raw/futures_util.rs @@ -49,17 +49,17 @@ pub type BoxedStaticFuture = futures::future::LocalBoxFuture<'static, T>; /// /// # Safety /// -/// MaybeSend equivalent to `Send` on non-wasm32 target. And it's empty -/// on wasm32 target. +/// [`MaybeSend`] is equivalent to `Send` on non-wasm32 target. +/// And it's empty trait on wasm32 target to indicate that a type is not `Send`. #[cfg(not(target_arch = "wasm32"))] -pub unsafe trait MaybeSend: Send {} +pub trait MaybeSend: Send {} #[cfg(target_arch = "wasm32")] -pub unsafe trait MaybeSend {} +pub trait MaybeSend {} #[cfg(not(target_arch = "wasm32"))] -unsafe impl MaybeSend for T {} +impl MaybeSend for T {} #[cfg(target_arch = "wasm32")] -unsafe impl MaybeSend for T {} +impl MaybeSend for T {} /// ConcurrentTasks is used to execute tasks concurrently. /// diff --git a/core/src/raw/oio/write/append_write.rs b/core/src/raw/oio/write/append_write.rs index 06c72cc5e2cc..9ff3d06b7c1a 100644 --- a/core/src/raw/oio/write/append_write.rs +++ b/core/src/raw/oio/write/append_write.rs @@ -65,7 +65,6 @@ pub struct AppendWriter { /// # Safety /// /// wasm32 is a special target that we only have one event-loop for this state. - impl AppendWriter { /// Create a new AppendWriter. pub fn new(inner: W) -> Self { diff --git a/core/src/raw/oio/write/mod.rs b/core/src/raw/oio/write/mod.rs index bd1ae5ae958b..9ce7d9427bbd 100644 --- a/core/src/raw/oio/write/mod.rs +++ b/core/src/raw/oio/write/mod.rs @@ -34,10 +34,6 @@ mod one_shot_write; pub use one_shot_write::OneShotWrite; pub use one_shot_write::OneShotWriter; -mod range_write; -pub use range_write::RangeWrite; -pub use range_write::RangeWriter; - mod block_write; pub use block_write::BlockWrite; pub use block_write::BlockWriter; diff --git a/core/src/raw/oio/write/multipart_write.rs b/core/src/raw/oio/write/multipart_write.rs index 2ce71ef4fe78..b692303c5b60 100644 --- a/core/src/raw/oio/write/multipart_write.rs +++ b/core/src/raw/oio/write/multipart_write.rs @@ -141,7 +141,6 @@ pub struct MultipartWriter { /// # Safety /// /// wasm32 is a special target that we only have one event-loop for this state. - impl MultipartWriter { /// Create a new MultipartWriter. pub fn new(inner: W, executor: Option, concurrent: usize) -> Self { diff --git a/core/src/raw/oio/write/range_write.rs b/core/src/raw/oio/write/range_write.rs deleted file mode 100644 index 905d850bae3f..000000000000 --- a/core/src/raw/oio/write/range_write.rs +++ /dev/null @@ -1,365 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use futures::select; -use futures::Future; -use futures::FutureExt; - -use crate::raw::*; -use crate::*; - -/// RangeWrite is used to implement [`oio::Write`] based on range write. -/// -/// # Services -/// -/// Services like gcs support range write via [GCS Resumable Upload](https://cloud.google.com/storage/docs/resumable-uploads). -/// -/// GCS will support upload content by specifying the range of the file in `CONTENT-RANGE`. -/// -/// Most range based services will have the following limitations: -/// -/// - The size of chunk per upload must be aligned to a certain size. For example, GCS requires -/// to align with 256KiB. -/// - Some services requires to complete the write at the last chunk with the total size. -/// -/// # Architecture -/// -/// The architecture after adopting [`RangeWrite`]: -/// -/// - Services impl `RangeWrite` -/// - `RangeWriter` impl `Write` -/// - Expose `RangeWriter` as `Accessor::Writer` -/// -/// # Requirements -/// -/// Services that implement `RangeWrite` must fulfill the following requirements: -/// -/// - Must be a http service that could accept `AsyncBody`. -/// - Need initialization before writing. -/// - Writing data based on range: `offset`, `size`. -pub trait RangeWrite: Send + Sync + Unpin + 'static { - /// write_once is used to write the data to underlying storage at once. - /// - /// RangeWriter will call this API when: - /// - /// - All the data has been written to the buffer and we can perform the upload at once. - fn write_once(&self, body: Buffer) -> impl Future> + MaybeSend; - - /// Initiate range the range write, the returning value is the location. - fn initiate_range(&self) -> impl Future> + MaybeSend; - - /// write_range will write a range of data. - fn write_range( - &self, - location: &str, - offset: u64, - body: Buffer, - ) -> impl Future> + MaybeSend; - - /// complete_range will complete the range write by uploading the last chunk. - fn complete_range( - &self, - location: &str, - offset: u64, - body: Buffer, - ) -> impl Future> + MaybeSend; - - /// abort_range will abort the range write by abort all already uploaded data. - fn abort_range(&self, location: &str) -> impl Future> + MaybeSend; -} - -struct WriteInput { - w: Arc, - executor: Executor, - - location: Arc, - offset: u64, - bytes: Buffer, -} - -/// RangeWriter will implements [`oio::Write`] based on range write. -pub struct RangeWriter { - w: Arc, - executor: Executor, - - location: Option>, - next_offset: u64, - cache: Option, - tasks: ConcurrentTasks, ()>, -} - -impl RangeWriter { - /// Create a new MultipartWriter. - pub fn new(inner: W, executor: Option, concurrent: usize) -> Self { - let executor = executor.unwrap_or_default(); - - Self { - w: Arc::new(inner), - executor: executor.clone(), - location: None, - next_offset: 0, - cache: None, - - tasks: ConcurrentTasks::new(executor, concurrent, |input| { - Box::pin(async move { - let fut = - input - .w - .write_range(&input.location, input.offset, input.bytes.clone()); - match input.executor.timeout() { - None => { - let result = fut.await; - (input, result) - } - Some(timeout) => { - let result = select! { - result = fut.fuse() => { - result - } - _ = timeout.fuse() => { - Err(Error::new( - ErrorKind::Unexpected, "write range timeout") - .with_context("offset", input.offset.to_string()) - .set_temporary()) - } - }; - (input, result) - } - } - }) - }), - } - } - - fn fill_cache(&mut self, bs: Buffer) -> usize { - let size = bs.len(); - assert!(self.cache.is_none()); - self.cache = Some(bs); - size - } -} - -impl oio::Write for RangeWriter { - async fn write(&mut self, bs: Buffer) -> Result<()> { - let location = match self.location.clone() { - Some(location) => location, - None => { - // Fill cache with the first write. - if self.cache.is_none() { - self.fill_cache(bs); - return Ok(()); - } - - let location = self.w.initiate_range().await?; - let location = Arc::new(location); - self.location = Some(location.clone()); - location - } - }; - - let bytes = self.cache.clone().expect("pending write must exist"); - let length = bytes.len() as u64; - let offset = self.next_offset; - - self.tasks - .execute(WriteInput { - w: self.w.clone(), - executor: self.executor.clone(), - location, - offset, - bytes, - }) - .await?; - self.cache = None; - self.next_offset += length; - self.fill_cache(bs); - Ok(()) - } - - async fn close(&mut self) -> Result<()> { - let Some(location) = self.location.clone() else { - let body = self.cache.clone().unwrap_or_default(); - // Call write_once if there is no data in buffer and no location. - self.w.write_once(body).await?; - self.cache = None; - return Ok(()); - }; - - // Make sure all tasks are finished. - while self.tasks.next().await.transpose()?.is_some() {} - - if let Some(buffer) = self.cache.clone() { - let offset = self.next_offset; - self.w.complete_range(&location, offset, buffer).await?; - self.cache = None; - } - - Ok(()) - } - - async fn abort(&mut self) -> Result<()> { - let Some(location) = self.location.clone() else { - return Ok(()); - }; - - self.tasks.clear(); - self.cache = None; - self.w.abort_range(&location).await?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use std::collections::HashSet; - use std::sync::Mutex; - use std::time::Duration; - - use pretty_assertions::assert_eq; - use rand::thread_rng; - use rand::Rng; - use rand::RngCore; - use tokio::time::sleep; - - use super::*; - use crate::raw::oio::Write; - - struct TestWrite { - length: u64, - bytes: HashSet, - } - - impl TestWrite { - pub fn new() -> Arc> { - let v = Self { - bytes: HashSet::new(), - length: 0, - }; - - Arc::new(Mutex::new(v)) - } - } - - impl RangeWrite for Arc> { - async fn write_once(&self, body: Buffer) -> Result<()> { - let mut test = self.lock().unwrap(); - let size = body.len() as u64; - test.length += size; - test.bytes.extend(0..size); - - Ok(()) - } - - async fn initiate_range(&self) -> Result { - Ok("test".to_string()) - } - - async fn write_range(&self, _: &str, offset: u64, body: Buffer) -> Result<()> { - // Add an async sleep here to enforce some pending. - sleep(Duration::from_millis(50)).await; - - // We will have 10% percent rate for write part to fail. - if thread_rng().gen_bool(1.0 / 10.0) { - return Err( - Error::new(ErrorKind::Unexpected, "I'm a crazy monkey!").set_temporary() - ); - } - - let mut test = self.lock().unwrap(); - let size = body.len() as u64; - test.length += size; - - let input = (offset..offset + size).collect::>(); - - assert!( - test.bytes.is_disjoint(&input), - "input should not have overlap" - ); - test.bytes.extend(input); - - Ok(()) - } - - async fn complete_range(&self, _: &str, offset: u64, body: Buffer) -> Result<()> { - // Add an async sleep here to enforce some pending. - sleep(Duration::from_millis(50)).await; - - // We will have 10% percent rate for write part to fail. - if thread_rng().gen_bool(1.0 / 10.0) { - return Err( - Error::new(ErrorKind::Unexpected, "I'm a crazy monkey!").set_temporary() - ); - } - - let mut test = self.lock().unwrap(); - let size = body.len() as u64; - test.length += size; - - let input = (offset..offset + size).collect::>(); - assert!( - test.bytes.is_disjoint(&input), - "input should not have overlap" - ); - test.bytes.extend(input); - - Ok(()) - } - - async fn abort_range(&self, _: &str) -> Result<()> { - Ok(()) - } - } - - #[tokio::test] - async fn test_range_writer_with_concurrent_errors() { - let mut rng = thread_rng(); - - let mut w = RangeWriter::new(TestWrite::new(), Some(Executor::new()), 200); - let mut total_size = 0u64; - - for _ in 0..1000 { - let size = rng.gen_range(1..1024); - total_size += size as u64; - - let mut bs = vec![0; size]; - rng.fill_bytes(&mut bs); - - loop { - match w.write(bs.clone().into()).await { - Ok(_) => break, - Err(_) => continue, - } - } - } - - loop { - match w.close().await { - Ok(_) => break, - Err(_) => continue, - } - } - - let actual_bytes = w.w.lock().unwrap().bytes.clone(); - let expected_bytes: HashSet<_> = (0..total_size).collect(); - assert_eq!(actual_bytes, expected_bytes); - - let actual_size = w.w.lock().unwrap().length; - assert_eq!(actual_size, total_size); - } -} diff --git a/core/src/raw/ops.rs b/core/src/raw/ops.rs index 3f7fe73478f8..7cdac88a588e 100644 --- a/core/src/raw/ops.rs +++ b/core/src/raw/ops.rs @@ -543,6 +543,7 @@ pub struct OpWrite { content_disposition: Option, cache_control: Option, executor: Option, + if_match: Option, if_none_match: Option, if_not_exists: bool, user_metadata: Option>, @@ -630,6 +631,17 @@ impl OpWrite { self } + /// Set the If-Match of the option + pub fn with_if_match(mut self, s: &str) -> Self { + self.if_match = Some(s.to_string()); + self + } + + /// Get If-Match from option + pub fn if_match(&self) -> Option<&str> { + self.if_match.as_deref() + } + /// Set the If-None-Match of the option pub fn with_if_none_match(mut self, s: &str) -> Self { self.if_none_match = Some(s.to_string()); diff --git a/core/src/raw/serde_util.rs b/core/src/raw/serde_util.rs index f34833a47d1c..f64bebab6a0f 100644 --- a/core/src/raw/serde_util.rs +++ b/core/src/raw/serde_util.rs @@ -95,7 +95,7 @@ impl Iterator for Pairs { /// Pair is used to hold both key and value of a config for better error output. struct Pair(String, String); -impl<'de> IntoDeserializer<'de, de::value::Error> for Pair { +impl IntoDeserializer<'_, de::value::Error> for Pair { type Deserializer = Self; fn into_deserializer(self) -> Self::Deserializer { diff --git a/core/src/services/aliyun_drive/backend.rs b/core/src/services/aliyun_drive/backend.rs index c20dee17b6dc..5a5669c0d55a 100644 --- a/core/src/services/aliyun_drive/backend.rs +++ b/core/src/services/aliyun_drive/backend.rs @@ -233,7 +233,7 @@ impl Access for AliyunDriveBackend { rename: true, list: true, list_with_limit: true, - + shared: true, ..Default::default() }); am.into() diff --git a/core/src/services/alluxio/backend.rs b/core/src/services/alluxio/backend.rs index 5a468c1a0cea..e3ff2acb170c 100644 --- a/core/src/services/alluxio/backend.rs +++ b/core/src/services/alluxio/backend.rs @@ -172,6 +172,8 @@ impl Access for AlluxioBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/atomicserver/backend.rs b/core/src/services/atomicserver/backend.rs index 546e6d25989f..5683cfea06be 100644 --- a/core/src/services/atomicserver/backend.rs +++ b/core/src/services/atomicserver/backend.rs @@ -361,6 +361,7 @@ impl kv::Adapter for Adapter { read: true, write: true, delete: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/azblob/backend.rs b/core/src/services/azblob/backend.rs index a8baf13ba495..20ac5ef3e188 100644 --- a/core/src/services/azblob/backend.rs +++ b/core/src/services/azblob/backend.rs @@ -513,6 +513,8 @@ impl Access for AzblobBackend { write_can_multi: true, write_with_cache_control: true, write_with_content_type: true, + write_with_if_not_exists: true, + write_with_if_none_match: true, write_with_user_metadata: true, delete: true, @@ -528,6 +530,8 @@ impl Access for AzblobBackend { presign_read: self.has_sas_token, presign_write: self.has_sas_token, + shared: true, + ..Default::default() }); diff --git a/core/src/services/azblob/core.rs b/core/src/services/azblob/core.rs index 6a208497f76c..1202ae8ba4bf 100644 --- a/core/src/services/azblob/core.rs +++ b/core/src/services/azblob/core.rs @@ -244,18 +244,10 @@ impl AzblobCore { let mut req = Request::put(&url); - if let Some(user_metadata) = args.user_metadata() { - for (key, value) in user_metadata { - req = req.header(format!("{X_MS_META_PREFIX}{key}"), value) - } - } - - // Set SSE headers. - req = self.insert_sse_headers(req); - - if let Some(cache_control) = args.cache_control() { - req = req.header(constants::X_MS_BLOB_CACHE_CONTROL, cache_control); - } + req = req.header( + HeaderName::from_static(constants::X_MS_BLOB_TYPE), + "BlockBlob", + ); if let Some(size) = size { req = req.header(CONTENT_LENGTH, size) @@ -265,10 +257,28 @@ impl AzblobCore { req = req.header(CONTENT_TYPE, ty) } - req = req.header( - HeaderName::from_static(constants::X_MS_BLOB_TYPE), - "BlockBlob", - ); + // Specify the wildcard character (*) to perform the operation only if + // the resource does not exist, and fail the operation if it does exist. + if args.if_not_exists() { + req = req.header(IF_NONE_MATCH, "*"); + } + + if let Some(v) = args.if_none_match() { + req = req.header(IF_NONE_MATCH, v); + } + + if let Some(cache_control) = args.cache_control() { + req = req.header(constants::X_MS_BLOB_CACHE_CONTROL, cache_control); + } + + // Set SSE headers. + req = self.insert_sse_headers(req); + + if let Some(user_metadata) = args.user_metadata() { + for (key, value) in user_metadata { + req = req.header(format!("{X_MS_META_PREFIX}{key}"), value) + } + } // Set body let req = req.body(body).map_err(new_request_build_error)?; diff --git a/core/src/services/azblob/error.rs b/core/src/services/azblob/error.rs index a8d596952911..1ea38ad8755e 100644 --- a/core/src/services/azblob/error.rs +++ b/core/src/services/azblob/error.rs @@ -66,7 +66,7 @@ pub(super) fn parse_error(resp: Response) -> Error { let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), StatusCode::FORBIDDEN => (ErrorKind::PermissionDenied, false), - StatusCode::PRECONDITION_FAILED | StatusCode::NOT_MODIFIED => { + StatusCode::PRECONDITION_FAILED | StatusCode::NOT_MODIFIED | StatusCode::CONFLICT => { (ErrorKind::ConditionNotMatch, false) } StatusCode::INTERNAL_SERVER_ERROR diff --git a/core/src/services/azdls/backend.rs b/core/src/services/azdls/backend.rs index 01e5149ec843..b2a6b493d318 100644 --- a/core/src/services/azdls/backend.rs +++ b/core/src/services/azdls/backend.rs @@ -236,12 +236,17 @@ impl Access for AzdlsBackend { write: true, write_can_append: true, + write_with_if_none_match: true, + write_with_if_not_exists: true, + create_dir: true, delete: true, rename: true, list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/azdls/core.rs b/core/src/services/azdls/core.rs index 714a5adc7bed..90d9f5533813 100644 --- a/core/src/services/azdls/core.rs +++ b/core/src/services/azdls/core.rs @@ -20,9 +20,9 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::fmt::Write; -use http::header::CONTENT_DISPOSITION; use http::header::CONTENT_LENGTH; use http::header::CONTENT_TYPE; +use http::header::{CONTENT_DISPOSITION, IF_NONE_MATCH}; use http::HeaderName; use http::HeaderValue; use http::Request; @@ -153,6 +153,14 @@ impl AzdlsCore { req = req.header(CONTENT_DISPOSITION, pos) } + if args.if_not_exists() { + req = req.header(IF_NONE_MATCH, "*") + } + + if let Some(v) = args.if_none_match() { + req = req.header(IF_NONE_MATCH, v) + } + // Set body let req = req.body(body).map_err(new_request_build_error)?; diff --git a/core/src/services/azdls/error.rs b/core/src/services/azdls/error.rs index de50c5638a4c..95e09bf88985 100644 --- a/core/src/services/azdls/error.rs +++ b/core/src/services/azdls/error.rs @@ -66,7 +66,9 @@ pub(super) fn parse_error(resp: Response) -> Error { let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), StatusCode::FORBIDDEN => (ErrorKind::PermissionDenied, false), - StatusCode::PRECONDITION_FAILED => (ErrorKind::ConditionNotMatch, false), + StatusCode::PRECONDITION_FAILED | StatusCode::CONFLICT => { + (ErrorKind::ConditionNotMatch, false) + } StatusCode::INTERNAL_SERVER_ERROR | StatusCode::BAD_GATEWAY | StatusCode::SERVICE_UNAVAILABLE diff --git a/core/src/services/azfile/backend.rs b/core/src/services/azfile/backend.rs index 5f8706c53dad..46ebb283bd2f 100644 --- a/core/src/services/azfile/backend.rs +++ b/core/src/services/azfile/backend.rs @@ -256,6 +256,8 @@ impl Access for AzfileBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/b2/backend.rs b/core/src/services/b2/backend.rs index 08a94ae9ea48..be3f61435bd2 100644 --- a/core/src/services/b2/backend.rs +++ b/core/src/services/b2/backend.rs @@ -262,6 +262,8 @@ impl Access for B2Backend { presign_write: true, presign_stat: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/cacache/backend.rs b/core/src/services/cacache/backend.rs index 1ef193a68252..71d4c3f839f1 100644 --- a/core/src/services/cacache/backend.rs +++ b/core/src/services/cacache/backend.rs @@ -96,6 +96,7 @@ impl kv::Adapter for Adapter { write: true, delete: true, blocking: true, + shared: false, ..Default::default() }, ) diff --git a/core/src/services/chainsafe/backend.rs b/core/src/services/chainsafe/backend.rs index d08a814288b9..80a403a89c9d 100644 --- a/core/src/services/chainsafe/backend.rs +++ b/core/src/services/chainsafe/backend.rs @@ -190,6 +190,8 @@ impl Access for ChainsafeBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/cloudflare_kv/backend.rs b/core/src/services/cloudflare_kv/backend.rs index c2be1ddd1663..5d4a65a4b31a 100644 --- a/core/src/services/cloudflare_kv/backend.rs +++ b/core/src/services/cloudflare_kv/backend.rs @@ -191,6 +191,7 @@ impl kv::Adapter for Adapter { read: true, write: true, list: true, + shared: true, ..Default::default() }, diff --git a/core/src/services/compfs/backend.rs b/core/src/services/compfs/backend.rs index 6d65ce7292be..ba323b50f84f 100644 --- a/core/src/services/compfs/backend.rs +++ b/core/src/services/compfs/backend.rs @@ -134,6 +134,8 @@ impl Access for CompfsBackend { copy: true, rename: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/cos/backend.rs b/core/src/services/cos/backend.rs index 2db8ab337108..e7efbc078e17 100644 --- a/core/src/services/cos/backend.rs +++ b/core/src/services/cos/backend.rs @@ -261,6 +261,8 @@ impl Access for CosBackend { write_with_content_type: true, write_with_cache_control: true, write_with_content_disposition: true, + // TODO: set this to false while version has been enabled. + write_with_if_not_exists: true, // The min multipart size of COS is 1 MiB. // // ref: @@ -285,6 +287,8 @@ impl Access for CosBackend { presign_read: true, presign_write: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/cos/core.rs b/core/src/services/cos/core.rs index f7dbdef60942..4d28b268895b 100644 --- a/core/src/services/cos/core.rs +++ b/core/src/services/cos/core.rs @@ -172,6 +172,18 @@ impl CosCore { req = req.header(CONTENT_TYPE, mime) } + // For a bucket which has never enabled versioning, you may use it to + // specify whether to prohibit overwriting the object with the same name + // when uploading the object: + // + // When the x-cos-forbid-overwrite is specified as true, overwriting the object + // with the same name will be prohibited. + // + // ref: https://www.tencentcloud.com/document/product/436/7749 + if args.if_not_exists() { + req = req.header("x-cos-forbid-overwrite", "true") + } + let req = req.body(body).map_err(new_request_build_error)?; Ok(req) diff --git a/core/src/services/cos/error.rs b/core/src/services/cos/error.rs index 65639e720591..df5f95df5cca 100644 --- a/core/src/services/cos/error.rs +++ b/core/src/services/cos/error.rs @@ -43,7 +43,7 @@ pub(super) fn parse_error(resp: Response) -> Error { let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), StatusCode::FORBIDDEN => (ErrorKind::PermissionDenied, false), - StatusCode::PRECONDITION_FAILED | StatusCode::NOT_MODIFIED => { + StatusCode::PRECONDITION_FAILED | StatusCode::NOT_MODIFIED | StatusCode::CONFLICT => { (ErrorKind::ConditionNotMatch, false) } StatusCode::INTERNAL_SERVER_ERROR diff --git a/core/src/services/d1/backend.rs b/core/src/services/d1/backend.rs index 643617431e47..a86d47ab8862 100644 --- a/core/src/services/d1/backend.rs +++ b/core/src/services/d1/backend.rs @@ -270,6 +270,7 @@ impl kv::Adapter for Adapter { // Cloudflare D1 supports 1MB as max in write_total. // refer to https://developers.cloudflare.com/d1/platform/limits/ write_total_max_size: Some(1000 * 1000), + shared: true, ..Default::default() }, ) diff --git a/core/src/services/dashmap/backend.rs b/core/src/services/dashmap/backend.rs index 1f4ea285ee05..09889b4a51d3 100644 --- a/core/src/services/dashmap/backend.rs +++ b/core/src/services/dashmap/backend.rs @@ -94,6 +94,7 @@ impl typed_kv::Adapter for Adapter { set: true, scan: true, delete: true, + shared: false, }, ) } diff --git a/core/src/services/dbfs/backend.rs b/core/src/services/dbfs/backend.rs index 97d1cc7d6581..93c2ec36f24c 100644 --- a/core/src/services/dbfs/backend.rs +++ b/core/src/services/dbfs/backend.rs @@ -166,6 +166,8 @@ impl Access for DbfsBackend { list: true, + shared: true, + ..Default::default() }); am.into() diff --git a/core/src/services/dropbox/backend.rs b/core/src/services/dropbox/backend.rs index 83f4007554da..c7cd15572231 100644 --- a/core/src/services/dropbox/backend.rs +++ b/core/src/services/dropbox/backend.rs @@ -67,6 +67,8 @@ impl Access for DropboxBackend { rename: true, + shared: true, + ..Default::default() }); ma.into() diff --git a/core/src/services/etcd/backend.rs b/core/src/services/etcd/backend.rs index 4a6e94f13bd9..8365908090bf 100644 --- a/core/src/services/etcd/backend.rs +++ b/core/src/services/etcd/backend.rs @@ -282,6 +282,7 @@ impl kv::Adapter for Adapter { read: true, write: true, list: true, + shared: true, ..Default::default() }, diff --git a/core/src/services/foundationdb/backend.rs b/core/src/services/foundationdb/backend.rs index b9c70946b5f4..c72099cb1e3f 100644 --- a/core/src/services/foundationdb/backend.rs +++ b/core/src/services/foundationdb/backend.rs @@ -120,6 +120,7 @@ impl kv::Adapter for Adapter { read: true, write: true, delete: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs index 3aed4925aad9..d2ca09608b1b 100644 --- a/core/src/services/fs/backend.rs +++ b/core/src/services/fs/backend.rs @@ -194,6 +194,8 @@ impl Access for FsBackend { rename: true, blocking: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/ftp/backend.rs b/core/src/services/ftp/backend.rs index fe4a22798136..460cdc815770 100644 --- a/core/src/services/ftp/backend.rs +++ b/core/src/services/ftp/backend.rs @@ -284,6 +284,8 @@ impl Access for FtpBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index 63f311ee34d2..2b5aac54dcf1 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -369,6 +369,8 @@ impl Access for GcsBackend { write_can_multi: true, write_with_content_type: true, write_with_user_metadata: true, + write_with_if_not_exists: true, + // The min multipart size of Gcs is 5 MiB. // // ref: @@ -397,6 +399,8 @@ impl Access for GcsBackend { presign_read: true, presign_write: true, + shared: true, + ..Default::default() }); am.into() diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index 86d73bc2e528..fe6a242f204c 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -270,6 +270,13 @@ impl GcsCore { write!(&mut url, "&predefinedAcl={}", acl).unwrap(); } + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + if op.if_not_exists() { + write!(&mut url, "&ifGenerationMatch=0").unwrap(); + } + let mut req = Request::post(&url); req = req.header(CONTENT_LENGTH, size.unwrap_or_default()); diff --git a/core/src/services/gdrive/backend.rs b/core/src/services/gdrive/backend.rs index 5f31ab56abd0..0fc6f7e24e60 100644 --- a/core/src/services/gdrive/backend.rs +++ b/core/src/services/gdrive/backend.rs @@ -56,10 +56,14 @@ impl Access for GdriveBackend { .set_root(&self.core.root) .set_native_capability(Capability { stat: true, + stat_has_content_length: true, + stat_has_content_type: true, + stat_has_last_modified: true, read: true, list: true, + list_has_content_type: true, write: true, @@ -67,6 +71,9 @@ impl Access for GdriveBackend { delete: true, rename: true, copy: true, + + shared: true, + ..Default::default() }); @@ -91,11 +98,12 @@ impl Access for GdriveBackend { let gdrive_file: GdriveFile = serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; - if gdrive_file.mime_type == "application/vnd.google-apps.folder" { - return Ok(RpStat::new(Metadata::new(EntryMode::DIR))); + let file_type = if gdrive_file.mime_type == "application/vnd.google-apps.folder" { + EntryMode::DIR + } else { + EntryMode::FILE }; - - let mut meta = Metadata::new(EntryMode::FILE); + let mut meta = Metadata::new(file_type).with_content_type(gdrive_file.mime_type); if let Some(v) = gdrive_file.size { meta = meta.with_content_length(v.parse::().map_err(|e| { Error::new(ErrorKind::Unexpected, "parse content length").set_source(e) diff --git a/core/src/services/gdrive/lister.rs b/core/src/services/gdrive/lister.rs index 2dfec0e739c7..7719f986f35f 100644 --- a/core/src/services/gdrive/lister.rs +++ b/core/src/services/gdrive/lister.rs @@ -58,13 +58,13 @@ impl oio::PageList for GdriveLister { _ => return Err(parse_error(resp)), }; - // Gdrive returns empty content when this dir is not exist. + // Google Drive returns an empty response when attempting to list a non-existent directory. if bytes.is_empty() { ctx.done = true; return Ok(()); } - // Return self at the first page. + // Include the current directory itself when handling the first page of the listing. if ctx.token.is_empty() && !ctx.done { let path = build_rel_path(&self.core.root, &self.path); let e = oio::Entry::new(&path, Metadata::new(EntryMode::DIR)); @@ -94,8 +94,12 @@ impl oio::PageList for GdriveLister { let path = format!("{}{}", &self.path, file.name); let normalized_path = build_rel_path(root, &path); - // Update path cache with list result. - self.core.path_cache.insert(&path, &file.id).await; + // Update path cache when path doesn't exist. + // When Google Drive converts a format, for example, Microsoft PowerPoint, + // Google Drive keeps two entries with the same ID. + if let Ok(None) = self.core.path_cache.get(&path).await { + self.core.path_cache.insert(&path, &file.id).await; + } let entry = oio::Entry::new(&normalized_path, Metadata::new(file_type)); ctx.entries.push_back(entry); diff --git a/core/src/services/ghac/backend.rs b/core/src/services/ghac/backend.rs index 92242cef4347..adc1ee6c8adc 100644 --- a/core/src/services/ghac/backend.rs +++ b/core/src/services/ghac/backend.rs @@ -251,6 +251,8 @@ impl Access for GhacBackend { write_can_multi: true, delete: true, + shared: true, + ..Default::default() }); am.into() diff --git a/core/src/services/github/backend.rs b/core/src/services/github/backend.rs index 83b0298f9ca4..11b085987d48 100644 --- a/core/src/services/github/backend.rs +++ b/core/src/services/github/backend.rs @@ -197,6 +197,8 @@ impl Access for GithubBackend { list: true, list_with_recursive: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/gridfs/backend.rs b/core/src/services/gridfs/backend.rs index f2bb2341534c..ad31fcc3ce33 100644 --- a/core/src/services/gridfs/backend.rs +++ b/core/src/services/gridfs/backend.rs @@ -221,6 +221,7 @@ impl kv::Adapter for Adapter { Capability { read: true, write: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/hdfs/backend.rs b/core/src/services/hdfs/backend.rs index 0c2d7627b8ae..da5039437dda 100644 --- a/core/src/services/hdfs/backend.rs +++ b/core/src/services/hdfs/backend.rs @@ -233,6 +233,8 @@ impl Access for HdfsBackend { rename: true, blocking: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/hdfs_native/backend.rs b/core/src/services/hdfs_native/backend.rs index 79685ffdcbb8..3884a5f16303 100644 --- a/core/src/services/hdfs_native/backend.rs +++ b/core/src/services/hdfs_native/backend.rs @@ -33,7 +33,6 @@ use crate::*; /// [Hadoop Distributed File System (HDFS™)](https://hadoop.apache.org/) support. /// Using [Native Rust HDFS client](https://github.com/Kimahriman/hdfs-native). - impl Configurator for HdfsNativeConfig { type Builder = HdfsNativeBuilder; fn into_builder(self) -> Self::Builder { @@ -163,6 +162,8 @@ impl Access for HdfsNativeBackend { delete: true, rename: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/http/backend.rs b/core/src/services/http/backend.rs index 6d5211a822bc..4fd894d9946e 100644 --- a/core/src/services/http/backend.rs +++ b/core/src/services/http/backend.rs @@ -220,6 +220,8 @@ impl Access for HttpBackend { presign_read: !self.has_authorization(), presign_stat: !self.has_authorization(), + shared: true, + ..Default::default() }); diff --git a/core/src/services/huggingface/backend.rs b/core/src/services/huggingface/backend.rs index 1e7e5f06b5c1..7be0ce152027 100644 --- a/core/src/services/huggingface/backend.rs +++ b/core/src/services/huggingface/backend.rs @@ -211,6 +211,8 @@ impl Access for HuggingfaceBackend { list: true, list_with_recursive: true, + shared: true, + ..Default::default() }); am.into() diff --git a/core/src/services/icloud/backend.rs b/core/src/services/icloud/backend.rs index 389f268d7f61..1ba0f2c3662f 100644 --- a/core/src/services/icloud/backend.rs +++ b/core/src/services/icloud/backend.rs @@ -241,6 +241,7 @@ impl Access for IcloudBackend { .set_native_capability(Capability { stat: true, read: true, + shared: true, ..Default::default() }); ma.into() diff --git a/core/src/services/ipfs/backend.rs b/core/src/services/ipfs/backend.rs index bc139a89ed6d..026549853386 100644 --- a/core/src/services/ipfs/backend.rs +++ b/core/src/services/ipfs/backend.rs @@ -181,6 +181,8 @@ impl Access for IpfsBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/ipmfs/backend.rs b/core/src/services/ipmfs/backend.rs index a1e83560dc75..1f1e63584f80 100644 --- a/core/src/services/ipmfs/backend.rs +++ b/core/src/services/ipmfs/backend.rs @@ -85,6 +85,8 @@ impl Access for IpmfsBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/koofr/backend.rs b/core/src/services/koofr/backend.rs index 852663dfd420..e5af50e351af 100644 --- a/core/src/services/koofr/backend.rs +++ b/core/src/services/koofr/backend.rs @@ -227,6 +227,8 @@ impl Access for KoofrBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/lakefs/backend.rs b/core/src/services/lakefs/backend.rs index 74ca1a25d38e..084ca58343a6 100644 --- a/core/src/services/lakefs/backend.rs +++ b/core/src/services/lakefs/backend.rs @@ -214,6 +214,7 @@ impl Access for LakefsBackend { write: true, delete: true, copy: true, + shared: true, ..Default::default() }); am.into() diff --git a/core/src/services/libsql/backend.rs b/core/src/services/libsql/backend.rs index 233580c78e9f..1867d3bbbfaf 100644 --- a/core/src/services/libsql/backend.rs +++ b/core/src/services/libsql/backend.rs @@ -315,6 +315,7 @@ impl kv::Adapter for Adapter { read: true, write: true, delete: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/memcached/backend.rs b/core/src/services/memcached/backend.rs index 2b772f713e5e..5ddfa4b9c114 100644 --- a/core/src/services/memcached/backend.rs +++ b/core/src/services/memcached/backend.rs @@ -206,6 +206,7 @@ impl kv::Adapter for Adapter { Capability { read: true, write: true, + shared: true, ..Default::default() }, diff --git a/core/src/services/memory/backend.rs b/core/src/services/memory/backend.rs index c300114659e0..cce6c7840f7c 100644 --- a/core/src/services/memory/backend.rs +++ b/core/src/services/memory/backend.rs @@ -84,6 +84,7 @@ impl typed_kv::Adapter for Adapter { set: true, delete: true, scan: true, + shared: false, }, ) } diff --git a/core/src/services/mini_moka/backend.rs b/core/src/services/mini_moka/backend.rs index 1fed62b31d95..23587e1e307e 100644 --- a/core/src/services/mini_moka/backend.rs +++ b/core/src/services/mini_moka/backend.rs @@ -143,6 +143,7 @@ impl typed_kv::Adapter for Adapter { set: true, delete: true, scan: true, + shared: false, }, ) } diff --git a/core/src/services/moka/backend.rs b/core/src/services/moka/backend.rs index 4a2e34e5f903..608bc902b56d 100644 --- a/core/src/services/moka/backend.rs +++ b/core/src/services/moka/backend.rs @@ -166,6 +166,7 @@ impl typed_kv::Adapter for Adapter { set: true, delete: true, scan: true, + shared: false, }, ) } diff --git a/core/src/services/mongodb/backend.rs b/core/src/services/mongodb/backend.rs index ed5f26a411e9..a3edd88dfcb7 100644 --- a/core/src/services/mongodb/backend.rs +++ b/core/src/services/mongodb/backend.rs @@ -235,6 +235,7 @@ impl kv::Adapter for Adapter { Capability { read: true, write: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/monoiofs/backend.rs b/core/src/services/monoiofs/backend.rs index e3eee02ca810..863fbe668087 100644 --- a/core/src/services/monoiofs/backend.rs +++ b/core/src/services/monoiofs/backend.rs @@ -124,6 +124,7 @@ impl Access for MonoiofsBackend { rename: true, create_dir: true, copy: true, + shared: true, ..Default::default() }); am.into() diff --git a/core/src/services/mysql/backend.rs b/core/src/services/mysql/backend.rs index ada294d26691..ad4ad4374b55 100644 --- a/core/src/services/mysql/backend.rs +++ b/core/src/services/mysql/backend.rs @@ -198,6 +198,7 @@ impl kv::Adapter for Adapter { read: true, write: true, delete: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/nebula_graph/backend.rs b/core/src/services/nebula_graph/backend.rs index 4d70e3210a73..37275cf4177d 100644 --- a/core/src/services/nebula_graph/backend.rs +++ b/core/src/services/nebula_graph/backend.rs @@ -283,6 +283,7 @@ impl kv::Adapter for Adapter { write_can_empty: true, delete: true, list: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/obs/backend.rs b/core/src/services/obs/backend.rs index 25317fe0e4c7..36f852da8ba6 100644 --- a/core/src/services/obs/backend.rs +++ b/core/src/services/obs/backend.rs @@ -294,6 +294,8 @@ impl Access for ObsBackend { presign_read: true, presign_write: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/onedrive/backend.rs b/core/src/services/onedrive/backend.rs index 25269186e688..35af133a8e5c 100644 --- a/core/src/services/onedrive/backend.rs +++ b/core/src/services/onedrive/backend.rs @@ -83,6 +83,7 @@ impl Access for OnedriveBackend { delete: true, create_dir: true, list: true, + shared: true, ..Default::default() }); diff --git a/core/src/services/oss/backend.rs b/core/src/services/oss/backend.rs index 9ede2b0d3e43..0806c3767245 100644 --- a/core/src/services/oss/backend.rs +++ b/core/src/services/oss/backend.rs @@ -446,6 +446,9 @@ impl Access for OssBackend { write_with_cache_control: true, write_with_content_type: true, write_with_content_disposition: true, + // TODO: set this to false while version has been enabled. + write_with_if_not_exists: true, + // The min multipart size of OSS is 100 KiB. // // ref: @@ -477,6 +480,8 @@ impl Access for OssBackend { batch: true, batch_max_operations: Some(self.core.batch_max_operations), + shared: true, + ..Default::default() }); diff --git a/core/src/services/oss/core.rs b/core/src/services/oss/core.rs index 2a471ef8ce10..480ab13987d1 100644 --- a/core/src/services/oss/core.rs +++ b/core/src/services/oss/core.rs @@ -41,6 +41,7 @@ use serde::Deserialize; use serde::Serialize; use crate::raw::*; +use crate::services::oss::core::constants::X_OSS_FORBID_OVERWRITE; use crate::*; pub mod constants { @@ -48,6 +49,8 @@ pub mod constants { pub const X_OSS_SERVER_SIDE_ENCRYPTION_KEY_ID: &str = "x-oss-server-side-encryption-key-id"; + pub const X_OSS_FORBID_OVERWRITE: &str = "x-oss-forbid-overwrite"; + pub const RESPONSE_CONTENT_DISPOSITION: &str = "response-content-disposition"; pub const OSS_QUERY_VERSION_ID: &str = "versionId"; @@ -181,6 +184,20 @@ impl OssCore { req = req.header(CACHE_CONTROL, cache_control); } + // TODO: disable if not exists while version has been enabled. + // + // Specifies whether the object that is uploaded by calling the PutObject operation + // overwrites the existing object that has the same name. When versioning is enabled + // or suspended for the bucket to which you want to upload the object, the + // x-oss-forbid-overwrite header does not take effect. In this case, the object that + // is uploaded by calling the PutObject operation overwrites the existing object that + // has the same name. + // + // ref: https://www.alibabacloud.com/help/en/oss/developer-reference/putobject?spm=a2c63.p38356.0.0.39ef75e93o0Xtz + if args.if_not_exists() { + req = req.header(X_OSS_FORBID_OVERWRITE, "true"); + } + if let Some(user_metadata) = args.user_metadata() { for (key, value) in user_metadata { // before insert user defined metadata header, add prefix to the header name diff --git a/core/src/services/oss/error.rs b/core/src/services/oss/error.rs index 15eb26d430b0..3cfeb66b0c25 100644 --- a/core/src/services/oss/error.rs +++ b/core/src/services/oss/error.rs @@ -42,7 +42,7 @@ pub(super) fn parse_error(resp: Response) -> Error { let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), StatusCode::FORBIDDEN => (ErrorKind::PermissionDenied, false), - StatusCode::PRECONDITION_FAILED | StatusCode::NOT_MODIFIED => { + StatusCode::PRECONDITION_FAILED | StatusCode::NOT_MODIFIED | StatusCode::CONFLICT => { (ErrorKind::ConditionNotMatch, false) } StatusCode::INTERNAL_SERVER_ERROR diff --git a/core/src/services/pcloud/backend.rs b/core/src/services/pcloud/backend.rs index ffc9b5c55828..4702d8778b03 100644 --- a/core/src/services/pcloud/backend.rs +++ b/core/src/services/pcloud/backend.rs @@ -216,6 +216,8 @@ impl Access for PcloudBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/pcloud/lister.rs b/core/src/services/pcloud/lister.rs index 86ade4dd580e..c3486b68a88d 100644 --- a/core/src/services/pcloud/lister.rs +++ b/core/src/services/pcloud/lister.rs @@ -84,10 +84,10 @@ impl oio::PageList for PcloudLister { return Ok(()); } - return Err(Error::new( + Err(Error::new( ErrorKind::Unexpected, String::from_utf8_lossy(&bs.to_bytes()), - )); + )) } _ => Err(parse_error(resp)), } diff --git a/core/src/services/persy/backend.rs b/core/src/services/persy/backend.rs index c9234f1878cb..8ff8bad67f4e 100644 --- a/core/src/services/persy/backend.rs +++ b/core/src/services/persy/backend.rs @@ -163,6 +163,7 @@ impl kv::Adapter for Adapter { write: true, delete: true, blocking: true, + shared: false, ..Default::default() }, ) diff --git a/core/src/services/postgresql/backend.rs b/core/src/services/postgresql/backend.rs index 72993c3af6a1..ab45709090ab 100644 --- a/core/src/services/postgresql/backend.rs +++ b/core/src/services/postgresql/backend.rs @@ -196,6 +196,7 @@ impl kv::Adapter for Adapter { Capability { read: true, write: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/redb/backend.rs b/core/src/services/redb/backend.rs index b0722f824cec..b8951ac1880d 100644 --- a/core/src/services/redb/backend.rs +++ b/core/src/services/redb/backend.rs @@ -121,6 +121,7 @@ impl kv::Adapter for Adapter { read: true, write: true, blocking: true, + shared: false, ..Default::default() }, ) diff --git a/core/src/services/redis/backend.rs b/core/src/services/redis/backend.rs index 19afe34c7655..c9eb204268e3 100644 --- a/core/src/services/redis/backend.rs +++ b/core/src/services/redis/backend.rs @@ -336,6 +336,7 @@ impl kv::Adapter for Adapter { Capability { read: true, write: true, + shared: true, ..Default::default() }, diff --git a/core/src/services/rocksdb/backend.rs b/core/src/services/rocksdb/backend.rs index 8dc3c6d2a2ba..3092bddef684 100644 --- a/core/src/services/rocksdb/backend.rs +++ b/core/src/services/rocksdb/backend.rs @@ -119,6 +119,7 @@ impl kv::Adapter for Adapter { write: true, list: true, blocking: true, + shared: false, ..Default::default() }, ) diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs index 7b2bbd6b2604..654b28da06ee 100644 --- a/core/src/services/s3/backend.rs +++ b/core/src/services/s3/backend.rs @@ -547,6 +547,12 @@ impl S3Builder { self } + /// Disable write with if match so that opendal will not send write request with if match headers. + pub fn disable_write_with_if_match(mut self) -> Self { + self.config.disable_write_with_if_match = true; + self + } + /// Detect region of S3 bucket. /// /// # Args @@ -877,6 +883,7 @@ impl Builder for S3Builder { client, batch_max_operations, checksum_algorithm, + disable_write_with_if_match: self.config.disable_write_with_if_match, }), }) } @@ -925,6 +932,7 @@ impl Access for S3Backend { write_can_multi: true, write_with_cache_control: true, write_with_content_type: true, + write_with_if_match: !self.core.disable_write_with_if_match, write_with_if_not_exists: true, write_with_user_metadata: true, @@ -960,6 +968,8 @@ impl Access for S3Backend { batch: true, batch_max_operations: Some(self.core.batch_max_operations), + shared: true, + ..Default::default() }); diff --git a/core/src/services/s3/config.rs b/core/src/services/s3/config.rs index b1b31d5bbc72..cbcef7a207d6 100644 --- a/core/src/services/s3/config.rs +++ b/core/src/services/s3/config.rs @@ -173,6 +173,10 @@ pub struct S3Config { /// Available options: /// - "crc32c" pub checksum_algorithm: Option, + /// Disable write with if match so that opendal will not send write request with if match headers. + /// + /// For example, Ceph RADOS S3 doesn't support write with if match. + pub disable_write_with_if_match: bool, } impl Debug for S3Config { diff --git a/core/src/services/s3/core.rs b/core/src/services/s3/core.rs index bc93b46e34b2..6cf689da01f9 100644 --- a/core/src/services/s3/core.rs +++ b/core/src/services/s3/core.rs @@ -98,6 +98,7 @@ pub struct S3Core { pub client: HttpClient, pub batch_max_operations: usize, pub checksum_algorithm: Option, + pub disable_write_with_if_match: bool, } impl Debug for S3Core { @@ -455,6 +456,14 @@ impl S3Core { req = req.header(CACHE_CONTROL, cache_control) } + if let Some(if_match) = args.if_match() { + req = req.header(IF_MATCH, if_match); + } + + if args.if_not_exists() { + req = req.header(IF_NONE_MATCH, "*"); + } + // Set storage class header if let Some(v) = &self.default_storage_class { req = req.header(HeaderName::from_static(constants::X_AMZ_STORAGE_CLASS), v); @@ -476,10 +485,6 @@ impl S3Core { req = self.insert_checksum_header(req, &checksum); } - if args.if_not_exists() { - req = req.header(IF_NONE_MATCH, "*"); - } - // Set body let req = req.body(body).map_err(new_request_build_error)?; @@ -764,7 +769,10 @@ impl S3Core { self.send(req).await } - pub async fn s3_delete_objects(&self, paths: Vec) -> Result> { + pub async fn s3_delete_objects( + &self, + paths: Vec<(String, OpDelete)>, + ) -> Result> { let url = format!("{}/?delete", self.endpoint); let req = Request::post(&url); @@ -772,8 +780,9 @@ impl S3Core { let content = quick_xml::se::to_string(&DeleteObjectsRequest { object: paths .into_iter() - .map(|path| DeleteObjectsRequestObject { + .map(|(path, op)| DeleteObjectsRequestObject { key: build_abs_path(&self.root, &path), + version_id: op.version().map(|v| v.to_owned()), }) .collect(), }) @@ -904,6 +913,8 @@ pub struct DeleteObjectsRequest { #[serde(rename_all = "PascalCase")] pub struct DeleteObjectsRequestObject { pub key: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub version_id: Option, } /// Result of DeleteObjects. @@ -1088,9 +1099,11 @@ mod tests { object: vec![ DeleteObjectsRequestObject { key: "sample1.txt".to_string(), + version_id: None, }, DeleteObjectsRequestObject { key: "sample2.txt".to_string(), + version_id: Some("11111".to_owned()), }, ], }; @@ -1105,6 +1118,7 @@ mod tests { sample2.txt + 11111 "# // Cleanup space and new line diff --git a/core/src/services/s3/docs.md b/core/src/services/s3/docs.md index 4027f95cac2c..1bae73dedfcd 100644 --- a/core/src/services/s3/docs.md +++ b/core/src/services/s3/docs.md @@ -28,8 +28,9 @@ This service can be used to: - `server_side_encryption_customer_algorithm`: Set the server_side_encryption_customer_algorithm for backend. - `server_side_encryption_customer_key`: Set the server_side_encryption_customer_key for backend. - `server_side_encryption_customer_key_md5`: Set the server_side_encryption_customer_key_md5 for backend. -- `disable_config_load`: Disable aws config load from env +- `disable_config_load`: Disable aws config load from env. - `enable_virtual_host_style`: Enable virtual host style. +- `disable_write_with_if_match`: Disable write with if match. Refer to [`S3Builder`]'s public API docs for more information. diff --git a/core/src/services/seafile/backend.rs b/core/src/services/seafile/backend.rs index 74ee07a89bb7..a8a1f98e3efa 100644 --- a/core/src/services/seafile/backend.rs +++ b/core/src/services/seafile/backend.rs @@ -235,6 +235,8 @@ impl Access for SeafileBackend { list: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/sftp/backend.rs b/core/src/services/sftp/backend.rs index 022700eb2922..5adf389ea9dd 100644 --- a/core/src/services/sftp/backend.rs +++ b/core/src/services/sftp/backend.rs @@ -347,6 +347,8 @@ impl Access for SftpBackend { copy: self.copyable, rename: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/sled/backend.rs b/core/src/services/sled/backend.rs index 64c4c367581e..308859945d7a 100644 --- a/core/src/services/sled/backend.rs +++ b/core/src/services/sled/backend.rs @@ -148,6 +148,7 @@ impl kv::Adapter for Adapter { write: true, list: true, blocking: true, + shared: false, ..Default::default() }, ) diff --git a/core/src/services/sqlite/backend.rs b/core/src/services/sqlite/backend.rs index 6f9c1aa15530..a6bfeb35da06 100644 --- a/core/src/services/sqlite/backend.rs +++ b/core/src/services/sqlite/backend.rs @@ -233,6 +233,7 @@ impl kv::Adapter for Adapter { write: true, delete: true, list: true, + shared: false, ..Default::default() }, ) diff --git a/core/src/services/supabase/backend.rs b/core/src/services/supabase/backend.rs index bbc57ce5af7e..56ddaabdfc67 100644 --- a/core/src/services/supabase/backend.rs +++ b/core/src/services/supabase/backend.rs @@ -167,6 +167,8 @@ impl Access for SupabaseBackend { write: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/surrealdb/backend.rs b/core/src/services/surrealdb/backend.rs index d7de77252683..3026a9c4e796 100644 --- a/core/src/services/surrealdb/backend.rs +++ b/core/src/services/surrealdb/backend.rs @@ -292,6 +292,7 @@ impl kv::Adapter for Adapter { Capability { read: true, write: true, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/swift/backend.rs b/core/src/services/swift/backend.rs index 5c3b95275c7d..12d2cb123311 100644 --- a/core/src/services/swift/backend.rs +++ b/core/src/services/swift/backend.rs @@ -199,6 +199,8 @@ impl Access for SwiftBackend { list: true, list_with_recursive: true, + shared: true, + ..Default::default() }); am.into() diff --git a/core/src/services/swift/error.rs b/core/src/services/swift/error.rs index c9880194b53b..73afa41645c7 100644 --- a/core/src/services/swift/error.rs +++ b/core/src/services/swift/error.rs @@ -61,10 +61,10 @@ pub(super) fn parse_error(resp: Response) -> Error { } fn parse_error_response(resp: &Bytes) -> String { - return match de::from_reader::<_, ErrorResponse>(resp.clone().reader()) { + match de::from_reader::<_, ErrorResponse>(resp.clone().reader()) { Ok(swift_err) => swift_err.p, Err(_) => String::from_utf8_lossy(resp).into_owned(), - }; + } } #[cfg(test)] diff --git a/core/src/services/tikv/backend.rs b/core/src/services/tikv/backend.rs index 5d37b526d5db..e6f343943ac0 100644 --- a/core/src/services/tikv/backend.rs +++ b/core/src/services/tikv/backend.rs @@ -195,6 +195,7 @@ impl kv::Adapter for Adapter { read: true, write: true, blocking: false, + shared: true, ..Default::default() }, ) diff --git a/core/src/services/upyun/backend.rs b/core/src/services/upyun/backend.rs index e703428fdfba..4373d357c21b 100644 --- a/core/src/services/upyun/backend.rs +++ b/core/src/services/upyun/backend.rs @@ -226,6 +226,8 @@ impl Access for UpyunBackend { list: true, list_with_limit: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/vercel_artifacts/backend.rs b/core/src/services/vercel_artifacts/backend.rs index 6069d911ac16..638afed26b21 100644 --- a/core/src/services/vercel_artifacts/backend.rs +++ b/core/src/services/vercel_artifacts/backend.rs @@ -63,6 +63,8 @@ impl Access for VercelArtifactsBackend { write: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/vercel_blob/backend.rs b/core/src/services/vercel_blob/backend.rs index 7cb169689a7e..53ea1f5ef500 100644 --- a/core/src/services/vercel_blob/backend.rs +++ b/core/src/services/vercel_blob/backend.rs @@ -172,6 +172,8 @@ impl Access for VercelBlobBackend { list: true, list_with_limit: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 5361de7c572c..81f2bfb0ad2d 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -234,6 +234,8 @@ impl Access for WebdavBackend { list: true, // We already support recursive list but some details still need to polish. // list_with_recursive: true, + shared: true, + ..Default::default() }); diff --git a/core/src/services/webhdfs/backend.rs b/core/src/services/webhdfs/backend.rs index b10873a0b7ab..4b6757d3bae9 100644 --- a/core/src/services/webhdfs/backend.rs +++ b/core/src/services/webhdfs/backend.rs @@ -172,10 +172,7 @@ impl Builder for WebhdfsBuilder { let atomic_write_dir = self.config.atomic_write_dir; - let auth = self - .config - .delegation - .map(|dt| format!("delegation_token={dt}")); + let auth = self.config.delegation.map(|dt| format!("delegation={dt}")); let client = HttpClient::new()?; @@ -542,6 +539,8 @@ impl Access for WebhdfsBackend { list: true, + shared: true, + ..Default::default() }); am.into() diff --git a/core/src/services/yandex_disk/backend.rs b/core/src/services/yandex_disk/backend.rs index f6506d14a0de..491efd9f8104 100644 --- a/core/src/services/yandex_disk/backend.rs +++ b/core/src/services/yandex_disk/backend.rs @@ -176,6 +176,8 @@ impl Access for YandexDiskBackend { list: true, list_with_limit: true, + shared: true, + ..Default::default() }); diff --git a/core/src/types/buffer.rs b/core/src/types/buffer.rs index b4124a920147..2fccda3ad0d5 100644 --- a/core/src/types/buffer.rs +++ b/core/src/types/buffer.rs @@ -287,10 +287,19 @@ impl Buffer { pub fn to_bytes(&self) -> Bytes { match &self.0 { Inner::Contiguous(bytes) => bytes.clone(), - Inner::NonContiguous { .. } => { - let mut ret = BytesMut::with_capacity(self.len()); - ret.put(self.clone()); - ret.freeze() + Inner::NonContiguous { + parts, + size, + idx: _, + offset, + } => { + if parts.len() == 1 { + parts[0].slice(*offset..(*offset + *size)) + } else { + let mut ret = BytesMut::with_capacity(self.len()); + ret.put(self.clone()); + ret.freeze() + } } } } diff --git a/core/src/types/capability.rs b/core/src/types/capability.rs index fd3191de9cfd..f8b87d28a393 100644 --- a/core/src/types/capability.rs +++ b/core/src/types/capability.rs @@ -128,6 +128,8 @@ pub struct Capability { pub write_with_content_disposition: bool, /// Indicates if Cache-Control can be specified during write operations. pub write_with_cache_control: bool, + /// Indicates if conditional write operations using If-Match are supported. + pub write_with_if_match: bool, /// Indicates if conditional write operations using If-None-Match are supported. pub write_with_if_none_match: bool, /// Indicates if write operations can be conditional on object non-existence. @@ -207,6 +209,9 @@ pub struct Capability { /// Maximum number of operations supported in a single batch. pub batch_max_operations: Option, + /// Indicate if the operator supports shared access. + pub shared: bool, + /// Indicates if blocking operations are supported. pub blocking: bool, } @@ -226,6 +231,9 @@ impl Debug for Capability { if self.presign { f.write_str("| Presign")?; } + if self.shared { + f.write_str("| Shared")?; + } if self.blocking { f.write_str("| Blocking")?; } diff --git a/core/src/types/entry.rs b/core/src/types/entry.rs index 9fe4510806fe..cdb70e91b6f3 100644 --- a/core/src/types/entry.rs +++ b/core/src/types/entry.rs @@ -62,7 +62,7 @@ impl Entry { &self.metadata } - /// Consume this entry to get it's path and metadata. + /// Consume this entry to get its path and metadata. pub fn into_parts(self) -> (String, Metadata) { (self.path, self.metadata) } diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index 95dfa1c17a1e..4393cd5e0206 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -472,6 +472,7 @@ impl OperatorBuilder { OperatorBuilder { accessor } .layer(ErrorContextLayer) .layer(CompleteLayer) + .layer(CorrectnessCheckLayer) } /// Create a new layer with static dispatch. diff --git a/core/src/types/operator/operator.rs b/core/src/types/operator/operator.rs index 7b2351a8e174..3f69a5cb3390 100644 --- a/core/src/types/operator/operator.rs +++ b/core/src/types/operator/operator.rs @@ -246,6 +246,24 @@ impl Operator { /// # } /// ``` /// + /// ## `version` + /// + /// Set `version` for this `stat` request. + /// + /// This feature can be used to retrieve the metadata of a specific version of the given path + /// + /// If the version doesn't exist, an error with kind [`ErrorKind::NotFound`] will be returned. + /// + /// ```no_run + /// # use opendal::Result; + /// # use opendal::Operator; + /// + /// # async fn test(op: Operator, version: &str) -> Result<()> { + /// let mut metadata = op.stat_with("path/to/file").version(version).await?; + /// # Ok(()) + /// # } + /// ``` + /// /// # Examples /// /// ## Get metadata while `ETag` matches @@ -548,6 +566,24 @@ impl Operator { /// # } /// ``` /// + /// ## `version` + /// + /// Set `version` for this `read` request. + /// + /// This feature can be used to retrieve the data of a specified version of the given path. + /// + /// If the version doesn't exist, an error with kind [`ErrorKind::NotFound`] will be returned. + /// + /// ```no_run + /// # use opendal::Result; + /// # use opendal::Operator; + /// + /// # async fn test(op: Operator, version: &str) -> Result<()> { + /// let mut bs = op.read_with("path/to/file").version(version).await?; + /// # Ok(()) + /// # } + /// ``` + /// /// # Examples /// /// Read the whole path into a bytes. @@ -667,6 +703,24 @@ impl Operator { /// # } /// ``` /// + /// ## `version` + /// + /// Set `version` for this `reader`. + /// + /// This feature can be used to retrieve the data of a specified version of the given path. + /// + /// If the version doesn't exist, an error with kind [`ErrorKind::NotFound`] will be returned. + /// + /// ```no_run + /// # use opendal::Result; + /// # use opendal::Operator; + /// + /// # async fn test(op: Operator, version: &str) -> Result<()> { + /// let mut bs = op.reader_with("path/to/file").version(version).await?; + /// # Ok(()) + /// # } + /// ``` + /// /// # Examples /// /// ```no_run @@ -1420,6 +1474,36 @@ impl Operator { /// # Ok(()) /// # } /// ``` + /// + /// ## `if_match` + /// + /// Sets an `if match` condition with specified ETag for this write request. + /// + /// ### Capability + /// + /// Check [`Capability::write_with_if_match`] before using this feature. + /// + /// ### Behavior + /// + /// - If the target file's ETag matches the specified one, proceeds with the write operation + /// - If the target file's ETag does not match the specified one, returns [`ErrorKind::ConditionNotMatch`] + /// + /// This operation will succeed when the target's ETag matches the specified one, + /// providing a way for conditional writes. + /// + /// ### Example + /// + /// ```no_run + /// # use opendal::{ErrorKind, Result}; + /// use opendal::Operator; + /// # async fn test(op: Operator, incorrect_etag: &str) -> Result<()> { + /// let bs = b"hello, world!".to_vec(); + /// let res = op.write_with("path/to/file", bs).if_match(incorrect_etag).await; + /// assert!(res.is_err()); + /// assert_eq!(res.unwrap_err().kind(), ErrorKind::ConditionNotMatch); + /// # Ok(()) + /// # } + /// ``` pub fn write_with( &self, path: &str, @@ -1482,6 +1566,26 @@ impl Operator { /// /// - Deleting a file that does not exist won't return errors. /// + /// # Options + /// + /// ## `version` + /// + /// Set `version` for this `delete` request. + /// + /// remove a specific version of the given path. + /// + /// If the version doesn't exist, OpenDAL will not return errors. + /// + /// ```no_run + /// # use opendal::Result; + /// # use opendal::Operator; + /// + /// # async fn test(op: Operator, version: &str) -> Result<()> { + /// op.delete_with("path/to/file").version(version).await?; + /// # Ok(()) + /// # } + ///``` + /// /// # Examples /// /// ``` @@ -1825,6 +1929,22 @@ impl Operator { /// # } /// ``` /// + /// ## `version` + /// + /// Specify whether to list files along with all their versions + /// + /// if `version` is enabled, all file versions will be returned; otherwise, + /// only the current files will be returned. + /// + /// ```no_run + /// # use opendal::Result; + /// # use opendal::Operator; + /// # async fn test(op: Operator) -> Result<()> { + /// let mut entries = op.list_with("path/to/dir/").version(true).await?; + /// # Ok(()) + /// # } + /// ``` + /// /// # Examples /// /// ## List all entries recursively @@ -1975,6 +2095,22 @@ impl Operator { /// # } /// ``` /// + /// ## `version` + /// + /// Specify whether to list files along with all their versions + /// + /// if `version` is enabled, all file versions will be returned; otherwise, + /// only the current files will be returned. + /// + /// ```no_run + /// # use opendal::Result; + /// # use opendal::Operator; + /// # async fn test(op: Operator) -> Result<()> { + /// let mut entries = op.lister_with("path/to/dir/").version(true).await?; + /// # Ok(()) + /// # } + /// ``` + /// /// # Examples /// /// ## List all files recursively diff --git a/core/src/types/operator/operator_futures.rs b/core/src/types/operator/operator_futures.rs index 23cb86d1ba22..de8f277ff284 100644 --- a/core/src/types/operator/operator_futures.rs +++ b/core/src/types/operator/operator_futures.rs @@ -323,6 +323,11 @@ impl>> FutureWrite { self.map(|(args, options, bs)| (args.with_executor(executor), options, bs)) } + /// Set the If-Match for this operation. + pub fn if_match(self, s: &str) -> Self { + self.map(|(args, options, bs)| (args.with_if_match(s), options, bs)) + } + /// Set the If-None-Match for this operation. pub fn if_none_match(self, s: &str) -> Self { self.map(|(args, options, bs)| (args.with_if_none_match(s), options, bs)) diff --git a/core/tests/behavior/async_write.rs b/core/tests/behavior/async_write.rs index b51cc2234f38..4540925018d0 100644 --- a/core/tests/behavior/async_write.rs +++ b/core/tests/behavior/async_write.rs @@ -46,6 +46,7 @@ pub fn tests(op: &Operator, tests: &mut Vec) { test_write_with_content_disposition, test_write_with_if_none_match, test_write_with_if_not_exists, + test_write_with_if_match, test_write_with_user_metadata, test_writer_write, test_writer_write_with_overwrite, @@ -674,3 +675,41 @@ pub async fn test_write_with_if_not_exists(op: Operator) -> Result<()> { Ok(()) } + +/// Write an file with if_match will get a ConditionNotMatch error if file's etag does not match. +pub async fn test_write_with_if_match(op: Operator) -> Result<()> { + if !op.info().full_capability().write_with_if_match { + return Ok(()); + } + + // Create two different files with different content + let (path_a, content_a, _) = TEST_FIXTURE.new_file(op.clone()); + let (path_b, content_b, _) = TEST_FIXTURE.new_file(op.clone()); + + // Write initial content to both files + op.write(&path_a, content_a.clone()).await?; + op.write(&path_b, content_b.clone()).await?; + + // Get etags for both files + let meta_a = op.stat(&path_a).await?; + let etag_a = meta_a.etag().expect("etag must exist"); + let meta_b = op.stat(&path_b).await?; + let etag_b = meta_b.etag().expect("etag must exist"); + + // Should succeed: Writing to path_a with its own etag + let res = op + .write_with(&path_a, content_a.clone()) + .if_match(etag_a) + .await; + assert!(res.is_ok()); + + // Should fail: Writing to path_a with path_b's etag + let res = op + .write_with(&path_a, content_a.clone()) + .if_match(etag_b) + .await; + assert!(res.is_err()); + assert_eq!(res.unwrap_err().kind(), ErrorKind::ConditionNotMatch); + + Ok(()) +} diff --git a/integrations/cloud_filter/tests/behavior/main.rs b/integrations/cloud_filter/tests/behavior/main.rs index 0b2cd371731e..8f73d9b2678d 100644 --- a/integrations/cloud_filter/tests/behavior/main.rs +++ b/integrations/cloud_filter/tests/behavior/main.rs @@ -43,6 +43,8 @@ const PROVIDER_NAME: &str = "ro-cloud_filter"; const DISPLAY_NAME: &str = "Test Cloud Filter"; const ROOT_PATH: &str = "C:\\sync_root"; +type Callback = Pin>>; + #[tokio::main] async fn main() -> ExitCode { let args = Arguments::from_args(); @@ -80,7 +82,7 @@ fn init( op: Operator, ) -> ( SyncRootId, - Connection>>)>>, + Connection>, ) { let sync_root_id = SyncRootIdBuilder::new(PROVIDER_NAME) .user_security_id(SecurityId::current_user().unwrap()) diff --git a/integrations/dav-server/src/fs.rs b/integrations/dav-server/src/fs.rs index 80444d8c676f..449b562c1186 100644 --- a/integrations/dav-server/src/fs.rs +++ b/integrations/dav-server/src/fs.rs @@ -73,7 +73,7 @@ impl DavFileSystem for OpendalFs { &'a self, path: &'a DavPath, options: dav_server::fs::OpenOptions, - ) -> FsFuture> { + ) -> FsFuture<'a, Box> { async move { let file = OpendalFile::open(self.op.clone(), path.clone(), options).await?; Ok(Box::new(file) as Box) @@ -85,7 +85,7 @@ impl DavFileSystem for OpendalFs { &'a self, path: &'a DavPath, _meta: ReadDirMeta, - ) -> FsFuture>> { + ) -> FsFuture<'a, FsStream>> { async move { let path = path.as_url_string(); self.op @@ -97,7 +97,7 @@ impl DavFileSystem for OpendalFs { .boxed() } - fn metadata<'a>(&'a self, path: &'a DavPath) -> FsFuture> { + fn metadata<'a>(&'a self, path: &'a DavPath) -> FsFuture<'a, Box> { async move { let opendal_metadata = self.op.stat(path.as_url_string().as_str()).await; match opendal_metadata { @@ -111,7 +111,7 @@ impl DavFileSystem for OpendalFs { .boxed() } - fn create_dir<'a>(&'a self, path: &'a DavPath) -> FsFuture<()> { + fn create_dir<'a>(&'a self, path: &'a DavPath) -> FsFuture<'a, ()> { async move { let path = path.as_url_string(); @@ -150,11 +150,11 @@ impl DavFileSystem for OpendalFs { .boxed() } - fn remove_dir<'a>(&'a self, path: &'a DavPath) -> FsFuture<()> { + fn remove_dir<'a>(&'a self, path: &'a DavPath) -> FsFuture<'a, ()> { self.remove_file(path) } - fn remove_file<'a>(&'a self, path: &'a DavPath) -> FsFuture<()> { + fn remove_file<'a>(&'a self, path: &'a DavPath) -> FsFuture<'a, ()> { async move { self.op .delete(path.as_url_string().as_str()) @@ -164,7 +164,7 @@ impl DavFileSystem for OpendalFs { .boxed() } - fn rename<'a>(&'a self, from: &'a DavPath, to: &'a DavPath) -> FsFuture<()> { + fn rename<'a>(&'a self, from: &'a DavPath, to: &'a DavPath) -> FsFuture<'a, ()> { async move { let from_path = from .as_rel_ospath() @@ -182,7 +182,7 @@ impl DavFileSystem for OpendalFs { .boxed() } - fn copy<'a>(&'a self, from: &'a DavPath, to: &'a DavPath) -> FsFuture<()> { + fn copy<'a>(&'a self, from: &'a DavPath, to: &'a DavPath) -> FsFuture<'a, ()> { async move { let from_path = from .as_rel_ospath() diff --git a/integrations/unftp-sbe/src/lib.rs b/integrations/unftp-sbe/src/lib.rs index 4f99195461df..415c5bc5e76a 100644 --- a/integrations/unftp-sbe/src/lib.rs +++ b/integrations/unftp-sbe/src/lib.rs @@ -222,10 +222,11 @@ impl StorageBackend for OpendalStorage { } async fn mkd + Send + Debug>(&self, _: &User, path: P) -> storage::Result<()> { - self.op - .create_dir(convert_path(path.as_ref())?) - .await - .map_err(convert_err) + let mut path_str = convert_path(path.as_ref())?.to_string(); + if !path_str.ends_with('/') { + path_str.push('/'); + } + self.op.create_dir(&path_str).await.map_err(convert_err) } async fn rename + Send + Debug>( diff --git a/integrations/virtiofs/src/buffer.rs b/integrations/virtiofs/src/buffer.rs index 0cd8d1bb0e65..09e42498dd5c 100644 --- a/integrations/virtiofs/src/buffer.rs +++ b/integrations/virtiofs/src/buffer.rs @@ -30,7 +30,7 @@ pub trait ReadWriteAtVolatile { fn write_vectored_at_volatile(&self, bufs: &[&VolatileSlice]) -> Result; } -impl<'a, B: BitmapSlice, T: ReadWriteAtVolatile + ?Sized> ReadWriteAtVolatile for &'a T { +impl + ?Sized> ReadWriteAtVolatile for &T { fn read_vectored_at_volatile(&self, bufs: &[&VolatileSlice]) -> Result { (**self).read_vectored_at_volatile(bufs) } diff --git a/integrations/virtiofs/src/virtiofs_util.rs b/integrations/virtiofs/src/virtiofs_util.rs index cf0ab041a5eb..9d390da97532 100644 --- a/integrations/virtiofs/src/virtiofs_util.rs +++ b/integrations/virtiofs/src/virtiofs_util.rs @@ -216,7 +216,7 @@ impl<'a, B: Bitmap + BitmapSlice + 'static> Reader<'a, B> { } } -impl<'a, B: BitmapSlice> io::Read for Reader<'a, B> { +impl io::Read for Reader<'_, B> { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.buffer .consume(buf.len(), |bufs| { @@ -308,7 +308,7 @@ impl<'a, B: Bitmap + BitmapSlice + 'static> Writer<'a, B> { } } -impl<'a, B: BitmapSlice> Write for Writer<'a, B> { +impl Write for Writer<'_, B> { fn write(&mut self, buf: &[u8]) -> io::Result { self.buffer .consume(buf.len(), |bufs| { diff --git a/website/community/maturity.md b/website/community/maturity.md index 4724c9db550a..d2b30c6589ac 100644 --- a/website/community/maturity.md +++ b/website/community/maturity.md @@ -11,7 +11,7 @@ More details can be found [here](https://community.apache.org/apache-way/apache- ## Status of this assessment -This assessment is still working in progress. +This assessment is evaluated during OpenDAL's graduation, which is finished on 2024-01-18. ## Maturity model assessment