diff --git a/.github/pr-title-checker-config.json b/.github/pr-title-checker-config.json
index 958bbba56d2c5..2da13e6e86f5c 100644
--- a/.github/pr-title-checker-config.json
+++ b/.github/pr-title-checker-config.json
@@ -4,7 +4,7 @@
     "color": "B60205"
   },
   "CHECKS": {
-    "regexp": "^(cherry pick|cherry-pick)?(| )+(feat|fix|test|refactor|chore|style|doc|perf|build|ci|revert|deprecate)(\\(.*\\))?:.*",
+    "regexp": "^(cherry pick|cherry-pick)?(| |:|: )+(feat|fix|test|refactor|chore|style|doc|perf|build|ci|revert|deprecate)(\\(.*\\))?:.*",
     "ignoreLabels" : ["ignore-title"]
   },
   "MESSAGES": {
diff --git a/.github/workflows/cherry-pick-to-release-branch.yml b/.github/workflows/cherry-pick-to-release-branch.yml
index 4e8fd60117731..18dba108047dd 100644
--- a/.github/workflows/cherry-pick-to-release-branch.yml
+++ b/.github/workflows/cherry-pick-to-release-branch.yml
@@ -22,8 +22,8 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
-  release_pull_request_1_8:
-    if: "contains(github.event.pull_request.labels.*.name, 'need-cherry-pick-release-1.8')  &&  github.event.pull_request.merged == true"
+  release_pull_request_1_10:
+    if: "contains(github.event.pull_request.labels.*.name, 'need-cherry-pick-release-1.10')  &&  github.event.pull_request.merged == true"
     runs-on: ubuntu-latest
     name: release_pull_request
     steps:
@@ -32,9 +32,9 @@ jobs:
       - name: Create PR to branch
         uses: risingwavelabs/github-action-cherry-pick@master
         with:
-          pr_branch: 'release-1.8'
+          pr_branch: 'release-1.10'
           pr_labels: 'cherry-pick'
-          pr_body: ${{ format('Cherry picking \#{0} onto branch release-1.8', github.event.number) }}
+          pr_body: ${{ format('Cherry picking \#{0} onto branch release-1.10', github.event.number) }}
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
diff --git a/Cargo.lock b/Cargo.lock
index 0ad318dc37dbf..3ccd821d4caa8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -241,7 +241,7 @@ dependencies = [
 [[package]]
 name = "apache-avro"
 version = "0.16.0"
-source = "git+https://github.com/risingwavelabs/avro?rev=5349b0c7b35940d117397edbd314ca9087cdb892#5349b0c7b35940d117397edbd314ca9087cdb892"
+source = "git+https://github.com/risingwavelabs/avro?rev=25113ba88234a9ae23296e981d8302c290fdaa4b#25113ba88234a9ae23296e981d8302c290fdaa4b"
 dependencies = [
  "bzip2",
  "crc32fast",
@@ -599,7 +599,7 @@ dependencies = [
  "arrow-schema 48.0.1",
  "chrono",
  "half 2.3.1",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "lexical-core",
  "num",
  "serde",
@@ -1560,7 +1560,7 @@ dependencies = [
  "aws-smithy-types",
  "bytes",
  "http 0.2.9",
- "http 1.0.0",
+ "http 1.1.0",
  "pin-project-lite",
  "tokio",
  "tracing",
@@ -1663,7 +1663,7 @@ dependencies = [
  "axum-core 0.4.3",
  "bytes",
  "futures-util",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body 1.0.0",
  "http-body-util",
  "hyper 1.1.0",
@@ -1713,7 +1713,7 @@ dependencies = [
  "async-trait",
  "bytes",
  "futures-util",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body 1.0.0",
  "http-body-util",
  "mime",
@@ -1733,7 +1733,7 @@ checksum = "077959a7f8cf438676af90b483304528eb7e16eadadb7f44e9ada4f9dceb9e62"
 dependencies = [
  "axum-core 0.4.3",
  "chrono",
- "http 1.0.0",
+ "http 1.1.0",
  "mime_guess",
  "rust-embed",
  "tower-service",
@@ -2100,6 +2100,26 @@ dependencies = [
  "alloc-stdlib",
 ]
 
+[[package]]
+name = "bson"
+version = "2.11.0"
+source = "git+https://github.com/risingwavelabs/bson-rust?rev=e5175ec#e5175ecfe3bebaaf58df543a1ae2bf868e476052"
+dependencies = [
+ "ahash 0.8.11",
+ "base64 0.13.1",
+ "bitvec",
+ "hex",
+ "indexmap 2.2.6",
+ "js-sys",
+ "once_cell",
+ "rand",
+ "serde",
+ "serde_bytes",
+ "serde_json",
+ "time",
+ "uuid",
+]
+
 [[package]]
 name = "bstr"
 version = "1.6.2"
@@ -2971,9 +2991,9 @@ checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff"
 
 [[package]]
 name = "crc32c"
-version = "0.6.5"
+version = "0.6.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89254598aa9b9fa608de44b3ae54c810f0f06d755e24c50177f1f8f31ff50ce2"
+checksum = "0227b9f93e535d49bc7ce914c066243424ce85ed90864cebd0874b184e9b6947"
 dependencies = [
  "rustc_version 0.4.0",
 ]
@@ -3216,16 +3236,15 @@ dependencies = [
 
 [[package]]
 name = "curve25519-dalek"
-version = "4.1.2"
+version = "4.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a677b8922c94e01bdbb12126b0bc852f00447528dee1782229af9c720c3f348"
+checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
 dependencies = [
  "cfg-if",
  "cpufeatures",
  "curve25519-dalek-derive",
  "digest",
  "fiat-crypto",
- "platforms",
  "rustc_version 0.4.0",
  "subtle",
  "zeroize",
@@ -3465,7 +3484,7 @@ dependencies = [
  "glob",
  "half 2.3.1",
  "hashbrown 0.14.3",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "itertools 0.11.0",
  "log",
  "num_cpus",
@@ -3578,7 +3597,7 @@ dependencies = [
  "half 2.3.1",
  "hashbrown 0.14.3",
  "hex",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "itertools 0.11.0",
  "libc",
  "log",
@@ -3612,7 +3631,7 @@ dependencies = [
  "futures",
  "half 2.3.1",
  "hashbrown 0.14.3",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "itertools 0.11.0",
  "log",
  "once_cell",
@@ -3890,7 +3909,7 @@ dependencies = [
  "deno_core",
  "deno_tls",
  "dyn-clone",
- "http 1.0.0",
+ "http 1.1.0",
  "pin-project",
  "reqwest 0.12.4",
  "serde",
@@ -3914,7 +3933,7 @@ dependencies = [
  "deno_net",
  "deno_websocket",
  "flate2",
- "http 1.0.0",
+ "http 1.1.0",
  "httparse",
  "hyper 0.14.27",
  "hyper 1.1.0",
@@ -4092,7 +4111,7 @@ dependencies = [
  "deno_tls",
  "fastwebsockets",
  "h2 0.4.4",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body-util",
  "hyper 1.1.0",
  "hyper-util",
@@ -4632,6 +4651,18 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "enum-as-inner"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73"
+dependencies = [
+ "heck 0.4.1",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "enum-as-inner"
 version = "0.5.1"
@@ -5652,7 +5683,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
 dependencies = [
  "fallible-iterator 0.3.0",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "stable_deref_trait",
 ]
 
@@ -5866,7 +5897,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.9",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "slab",
  "tokio",
  "tokio-util",
@@ -5884,8 +5915,8 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "futures-util",
- "http 1.0.0",
- "indexmap 2.0.0",
+ "http 1.1.0",
+ "indexmap 2.2.6",
  "slab",
  "tokio",
  "tokio-util",
@@ -6076,9 +6107,9 @@ dependencies = [
 
 [[package]]
 name = "http"
-version = "1.0.0"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b32afd38673a8016f7c9ae69e5af41a58f81b1d31689040f2f1959594ce194ea"
+checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258"
 dependencies = [
  "bytes",
  "fnv",
@@ -6103,7 +6134,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643"
 dependencies = [
  "bytes",
- "http 1.0.0",
+ "http 1.1.0",
 ]
 
 [[package]]
@@ -6114,7 +6145,7 @@ checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d"
 dependencies = [
  "bytes",
  "futures-core",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body 1.0.0",
  "pin-project-lite",
 ]
@@ -6177,7 +6208,7 @@ dependencies = [
  "futures-channel",
  "futures-util",
  "h2 0.4.4",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body 1.0.0",
  "httparse",
  "httpdate",
@@ -6211,7 +6242,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
 dependencies = [
  "futures-util",
- "http 1.0.0",
+ "http 1.1.0",
  "hyper 1.1.0",
  "hyper-util",
  "rustls 0.22.4",
@@ -6271,7 +6302,7 @@ dependencies = [
  "bytes",
  "futures-channel",
  "futures-util",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body 1.0.0",
  "hyper 1.1.0",
  "pin-project-lite",
@@ -6347,7 +6378,7 @@ dependencies = [
  "log",
  "murmur3",
  "once_cell",
- "opendal",
+ "opendal 0.45.1",
  "ordered-float 3.9.1",
  "parquet 50.0.0",
  "prometheus",
@@ -6357,7 +6388,7 @@ dependencies = [
  "serde",
  "serde_bytes",
  "serde_json",
- "serde_with",
+ "serde_with 3.8.0",
  "tokio",
  "toml 0.7.8",
  "url",
@@ -6427,9 +6458,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.0.0"
+version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
+checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
 dependencies = [
  "equivalent",
  "hashbrown 0.14.3",
@@ -6468,7 +6499,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "73c0fefcb6d409a6587c07515951495d482006f89a21daa0f2f783aa4fd5e027"
 dependencies = [
  "ahash 0.8.11",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "is-terminal",
  "itoa",
  "log",
@@ -7167,7 +7198,7 @@ dependencies = [
  "aws-smithy-types",
  "aws-types",
  "bytes",
- "http 1.0.0",
+ "http 1.1.0",
  "madsim",
  "spin 0.9.8",
  "tracing",
@@ -7184,7 +7215,7 @@ dependencies = [
  "http 0.2.9",
  "madsim",
  "serde",
- "serde_with",
+ "serde_with 3.8.0",
  "spin 0.9.8",
  "thiserror",
  "tokio",
@@ -7514,6 +7545,53 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "mongodb"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef206acb1b72389b49bc9985efe7eb1f8a9bb18e5680d262fac26c07f44025f1"
+dependencies = [
+ "async-trait",
+ "base64 0.13.1",
+ "bitflags 1.3.2",
+ "bson",
+ "chrono",
+ "derivative",
+ "derive_more",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-util",
+ "hex",
+ "hmac",
+ "lazy_static",
+ "md-5",
+ "pbkdf2 0.11.0",
+ "percent-encoding",
+ "rand",
+ "rustc_version_runtime",
+ "rustls 0.21.11",
+ "rustls-pemfile 1.0.4",
+ "serde",
+ "serde_bytes",
+ "serde_with 1.14.0",
+ "sha-1",
+ "sha2",
+ "socket2 0.4.9",
+ "stringprep",
+ "strsim 0.10.0",
+ "take_mut",
+ "thiserror",
+ "tokio",
+ "tokio-rustls 0.24.1",
+ "tokio-util",
+ "trust-dns-proto 0.21.2",
+ "trust-dns-resolver 0.21.2",
+ "typed-builder 0.10.0",
+ "uuid",
+ "webpki-roots 0.25.2",
+]
+
 [[package]]
 name = "more-asserts"
 version = "0.3.1"
@@ -7976,7 +8054,7 @@ checksum = "d8dd6c0cdf9429bce006e1362bfce61fa1bfd8c898a643ed8d2b471934701d3d"
 dependencies = [
  "crc32fast",
  "hashbrown 0.14.3",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "memchr",
 ]
 
@@ -8050,7 +8128,7 @@ dependencies = [
  "percent-encoding",
  "prometheus",
  "quick-xml 0.31.0",
- "reqsign",
+ "reqsign 0.14.9",
  "reqwest 0.11.20",
  "serde",
  "serde_json",
@@ -8059,6 +8137,37 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "opendal"
+version = "0.47.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c3ba698f2258bebdf7a3a38862bb6ef1f96d351627002686dacc228f805bdd6"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "backon",
+ "base64 0.22.0",
+ "bytes",
+ "chrono",
+ "crc32c",
+ "flagset",
+ "futures",
+ "getrandom",
+ "http 1.1.0",
+ "log",
+ "md-5",
+ "once_cell",
+ "percent-encoding",
+ "quick-xml 0.31.0",
+ "reqsign 0.15.2",
+ "reqwest 0.12.4",
+ "serde",
+ "serde_json",
+ "sha2",
+ "tokio",
+ "uuid",
+]
+
 [[package]]
 name = "openidconnect"
 version = "3.4.0"
@@ -8084,7 +8193,7 @@ dependencies = [
  "serde_json",
  "serde_path_to_error",
  "serde_plain",
- "serde_with",
+ "serde_with 3.8.0",
  "sha2",
  "subtle",
  "thiserror",
@@ -8143,7 +8252,7 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a"
 dependencies = [
  "futures-core",
  "futures-sink",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "js-sys",
  "once_cell",
  "pin-project-lite",
@@ -8594,6 +8703,15 @@ dependencies = [
  "prost-types 0.11.9",
 ]
 
+[[package]]
+name = "pbkdf2"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917"
+dependencies = [
+ "digest",
+]
+
 [[package]]
 name = "pbkdf2"
 version = "0.12.2"
@@ -8636,7 +8754,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
 dependencies = [
  "fixedbitset 0.4.2",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
 ]
 
 [[package]]
@@ -8797,7 +8915,7 @@ dependencies = [
  "aes",
  "cbc",
  "der 0.7.8",
- "pbkdf2",
+ "pbkdf2 0.12.2",
  "scrypt",
  "sha2",
  "spki 0.7.2",
@@ -8831,12 +8949,6 @@ version = "0.3.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
 
-[[package]]
-name = "platforms"
-version = "3.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4503fa043bf02cee09a9582e9554b4c6403b2ef55e4612e96561d294419429f8"
-
 [[package]]
 name = "plotters"
 version = "0.3.5"
@@ -9883,11 +9995,12 @@ dependencies = [
 [[package]]
 name = "reqsign"
 version = "0.14.9"
-source = "git+https://github.com/wcy-fdu/reqsign.git?rev=c7dd668#c7dd668764ada1e7477177cfa913fec24252dd34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43e319d9de9ff4d941abf4ac718897118b0fe04577ea3f8e0f5788971784eef5"
 dependencies = [
  "anyhow",
  "async-trait",
- "base64 0.22.0",
+ "base64 0.21.7",
  "chrono",
  "form_urlencoded",
  "getrandom",
@@ -9903,7 +10016,38 @@ dependencies = [
  "rand",
  "reqwest 0.11.20",
  "rsa",
- "rust-ini",
+ "rust-ini 0.20.0",
+ "serde",
+ "serde_json",
+ "sha1",
+ "sha2",
+]
+
+[[package]]
+name = "reqsign"
+version = "0.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70fe66d4cd0b5ed9b1abbfe639bf6baeaaf509f7da2d51b31111ba945be59286"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "base64 0.22.0",
+ "chrono",
+ "form_urlencoded",
+ "getrandom",
+ "hex",
+ "hmac",
+ "home",
+ "http 1.1.0",
+ "jsonwebtoken",
+ "log",
+ "once_cell",
+ "percent-encoding",
+ "quick-xml 0.31.0",
+ "rand",
+ "reqwest 0.12.4",
+ "rsa",
+ "rust-ini 0.21.0",
  "serde",
  "serde_json",
  "sha1",
@@ -9969,7 +10113,7 @@ dependencies = [
  "futures-core",
  "futures-util",
  "h2 0.4.4",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body 1.0.0",
  "http-body-util",
  "hyper 1.1.0",
@@ -10016,7 +10160,7 @@ checksum = "a45d100244a467870f6cb763c4484d010a6bed6bd610b3676e3825d93fb4cfbd"
 dependencies = [
  "anyhow",
  "async-trait",
- "http 1.0.0",
+ "http 1.1.0",
  "reqwest 0.12.4",
  "serde",
  "thiserror",
@@ -10118,7 +10262,7 @@ dependencies = [
  "reqwest 0.12.4",
  "serde",
  "serde_json",
- "serde_with",
+ "serde_with 3.8.0",
  "serde_yaml",
  "tempfile",
  "thiserror-ext",
@@ -10199,7 +10343,7 @@ dependencies = [
  "madsim-tokio",
  "madsim-tonic",
  "memcomparable",
- "opendal",
+ "opendal 0.45.1",
  "parking_lot 0.12.1",
  "paste",
  "prometheus",
@@ -10412,7 +10556,7 @@ dependencies = [
  "serde_bytes",
  "serde_default",
  "serde_json",
- "serde_with",
+ "serde_with 3.8.0",
  "smallbitset",
  "speedate",
  "stacker",
@@ -10680,7 +10824,7 @@ dependencies = [
  "google-cloud-pubsub",
  "http 0.2.9",
  "icelake",
- "indexmap 1.9.3",
+ "indexmap 2.2.6",
  "itertools 0.12.1",
  "jni",
  "jsonbb",
@@ -10691,11 +10835,12 @@ dependencies = [
  "madsim-tonic",
  "maplit",
  "moka",
+ "mongodb",
  "mysql_async",
  "mysql_common",
  "nexmark",
  "num-bigint",
- "opendal",
+ "opendal 0.45.1",
  "openssl",
  "parking_lot 0.12.1",
  "paste",
@@ -10732,7 +10877,7 @@ dependencies = [
  "serde",
  "serde_derive",
  "serde_json",
- "serde_with",
+ "serde_with 3.8.0",
  "serde_yaml",
  "simd-json",
  "sqlx",
@@ -11245,6 +11390,7 @@ dependencies = [
  "enum-as-inner 0.6.0",
  "expect-test",
  "fail",
+ "flate2",
  "function_name",
  "futures",
  "hex",
@@ -11424,9 +11570,9 @@ dependencies = [
  "madsim",
  "madsim-aws-sdk-s3",
  "madsim-tokio",
- "opendal",
+ "opendal 0.47.0",
  "prometheus",
- "reqwest 0.11.20",
+ "reqwest 0.12.4",
  "risingwave_common",
  "rustls 0.23.5",
  "spin 0.9.8",
@@ -11470,7 +11616,7 @@ dependencies = [
  "risingwave_frontend",
  "risingwave_sqlparser",
  "serde",
- "serde_with",
+ "serde_with 3.8.0",
  "serde_yaml",
  "tempfile",
  "thiserror-ext",
@@ -11618,7 +11764,7 @@ dependencies = [
  "madsim-tokio",
  "matches",
  "serde",
- "serde_with",
+ "serde_with 3.8.0",
  "serde_yaml",
  "thiserror",
  "tracing",
@@ -11668,7 +11814,7 @@ dependencies = [
  "regex",
  "risingwave_rt",
  "serde",
- "serde_with",
+ "serde_with 3.8.0",
  "tokio-postgres",
  "tokio-stream",
  "toml 0.8.12",
@@ -12000,6 +12146,17 @@ dependencies = [
  "ordered-multimap",
 ]
 
+[[package]]
+name = "rust-ini"
+version = "0.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d625ed57d8f49af6cfa514c42e1a71fadcff60eb0b1c517ff82fe41aa025b41"
+dependencies = [
+ "cfg-if",
+ "ordered-multimap",
+ "trim-in-place",
+]
+
 [[package]]
 name = "rust_decimal"
 version = "1.35.0"
@@ -12047,6 +12204,16 @@ dependencies = [
  "semver 1.0.18",
 ]
 
+[[package]]
+name = "rustc_version_runtime"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d31b7153270ebf48bf91c65ae5b0c00e749c4cfad505f66530ac74950249582f"
+dependencies = [
+ "rustc_version 0.2.3",
+ "semver 0.9.0",
+]
+
 [[package]]
 name = "rustix"
 version = "0.36.16"
@@ -12399,7 +12566,7 @@ version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f"
 dependencies = [
- "pbkdf2",
+ "pbkdf2 0.12.2",
  "salsa20",
  "sha2",
 ]
@@ -12853,6 +13020,16 @@ dependencies = [
  "v8",
 ]
 
+[[package]]
+name = "serde_with"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff"
+dependencies = [
+ "serde",
+ "serde_with_macros 1.5.2",
+]
+
 [[package]]
 name = "serde_with"
 version = "3.8.0"
@@ -12863,14 +13040,26 @@ dependencies = [
  "chrono",
  "hex",
  "indexmap 1.9.3",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "serde",
  "serde_derive",
  "serde_json",
- "serde_with_macros",
+ "serde_with_macros 3.8.0",
  "time",
 ]
 
+[[package]]
+name = "serde_with_macros"
+version = "1.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082"
+dependencies = [
+ "darling 0.13.4",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "serde_with_macros"
 version = "3.8.0"
@@ -12889,7 +13078,7 @@ version = "0.9.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574"
 dependencies = [
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "itoa",
  "ryu",
  "serde",
@@ -13445,7 +13634,7 @@ dependencies = [
  "futures-util",
  "hashlink",
  "hex",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "log",
  "memchr",
  "native-tls",
@@ -13841,7 +14030,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ce837c5eae1cb200a310940de989fd9b3d12ed62d7752bc69b39ef8aa775ec04"
 dependencies = [
  "anyhow",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "serde",
  "serde_json",
  "swc_cached",
@@ -13953,7 +14142,7 @@ checksum = "66539401f619730b26d380a120b91b499f80cbdd9bb15d00aa73bc3a4d4cc394"
 dependencies = [
  "better_scoped_tls",
  "bitflags 2.5.0",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "once_cell",
  "phf",
  "rustc-hash",
@@ -14022,7 +14211,7 @@ checksum = "f0ec75c1194365abe4d44d94e58f918ec853469ecd39733b381a089cfdcdee1a"
 dependencies = [
  "base64 0.21.7",
  "dashmap",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "once_cell",
  "serde",
  "sha-1",
@@ -14061,7 +14250,7 @@ version = "0.127.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "14482e455df85486d68a51533a31645d511e56df93a35cadf0eabbe7abe96b98"
 dependencies = [
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "num_cpus",
  "once_cell",
  "rustc-hash",
@@ -14274,6 +14463,12 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
 
+[[package]]
+name = "take_mut"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
+
 [[package]]
 name = "tap"
 version = "1.0.1"
@@ -14758,7 +14953,7 @@ version = "0.19.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
 dependencies = [
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -14771,7 +14966,7 @@ version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1"
 dependencies = [
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "toml_datetime",
  "winnow 0.5.15",
 ]
@@ -14782,7 +14977,7 @@ version = "0.22.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4"
 dependencies = [
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -14893,7 +15088,7 @@ dependencies = [
  "bytes",
  "futures-core",
  "futures-util",
- "http 1.0.0",
+ "http 1.1.0",
  "http-body 1.0.0",
  "http-body-util",
  "http-range-header",
@@ -15062,6 +15257,12 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "trim-in-place"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc"
+
 [[package]]
 name = "triomphe"
 version = "0.1.11"
@@ -15078,6 +15279,31 @@ version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "622b09ce2fe2df4618636fb92176d205662f59803f39e70d1c333393082de96c"
 
+[[package]]
+name = "trust-dns-proto"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c31f240f59877c3d4bb3b3ea0ec5a6a0cff07323580ff8c7a605cd7d08b255d"
+dependencies = [
+ "async-trait",
+ "cfg-if",
+ "data-encoding",
+ "enum-as-inner 0.4.0",
+ "futures-channel",
+ "futures-io",
+ "futures-util",
+ "idna 0.2.3",
+ "ipnet",
+ "lazy_static",
+ "log",
+ "rand",
+ "smallvec",
+ "thiserror",
+ "tinyvec",
+ "tokio",
+ "url",
+]
+
 [[package]]
 name = "trust-dns-proto"
 version = "0.22.0"
@@ -15129,6 +15355,26 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "trust-dns-resolver"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4ba72c2ea84515690c9fcef4c6c660bb9df3036ed1051686de84605b74fd558"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "ipconfig",
+ "lazy_static",
+ "log",
+ "lru-cache",
+ "parking_lot 0.12.1",
+ "resolv-conf",
+ "smallvec",
+ "thiserror",
+ "tokio",
+ "trust-dns-proto 0.21.2",
+]
+
 [[package]]
 name = "trust-dns-resolver"
 version = "0.22.0"
@@ -15205,6 +15451,17 @@ version = "2.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
 
+[[package]]
+name = "typed-builder"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89851716b67b937e393b3daa8423e67ddfc4bbbf1654bcf05488e95e0828db0c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "typed-builder"
 version = "0.16.2"
@@ -15719,7 +15976,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d6998515d3cf3f8b980ef7c11b29a9b1017d4cf86b99ae93b546992df9931413"
 dependencies = [
  "bitflags 2.5.0",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "semver 1.0.18",
 ]
 
@@ -15748,7 +16005,7 @@ dependencies = [
  "encoding_rs",
  "fxprof-processed-profile",
  "gimli",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "ittapi",
  "libc",
  "log",
@@ -15864,7 +16121,7 @@ dependencies = [
  "cpp_demangle",
  "cranelift-entity",
  "gimli",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "log",
  "object 0.33.0",
  "rustc-demangle",
@@ -15927,7 +16184,7 @@ dependencies = [
  "cc",
  "cfg-if",
  "encoding_rs",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "libc",
  "log",
  "mach2",
@@ -16002,7 +16259,7 @@ checksum = "595bc7bb3b0ff4aa00fab718c323ea552c3034d77abc821a35112552f2ea487a"
 dependencies = [
  "anyhow",
  "heck 0.4.1",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "wit-parser",
 ]
 
@@ -16521,7 +16778,7 @@ checksum = "744237b488352f4f27bca05a10acb79474415951c450e52ebd0da784c1df2bcc"
 dependencies = [
  "anyhow",
  "id-arena",
- "indexmap 2.0.0",
+ "indexmap 2.2.6",
  "log",
  "semver 1.0.18",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 7e7910234bd7b..457e260cb3b1d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -319,8 +319,6 @@ tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev =
 futures-timer = { git = "https://github.com/madsim-rs/futures-timer.git", rev = "05b33b4" }
 # patch: unlimit 4MB message size for grpc client
 etcd-client = { git = "https://github.com/risingwavelabs/etcd-client.git", rev = "4e84d40" }
-# todo(wcy-fdu): remove this patch fork after opendal release a new version to apply azure workload identity change.
-reqsign = { git = "https://github.com/wcy-fdu/reqsign.git", rev = "c7dd668" }
 # patch to remove preserve_order from serde_json
 deno_core = { git = "https://github.com/bakjos/deno_core", rev = "9b241c6" }
 # patch to user reqwest 0.12.2
@@ -330,6 +328,8 @@ deno_net = { git = "https://github.com/bakjos/deno", rev = "787a232" }
 deno_tls = { git = "https://github.com/bakjos/deno", rev = "787a232" }
 deno_web = { git = "https://github.com/bakjos/deno", rev = "787a232" }
 deno_websocket = { git = "https://github.com/bakjos/deno", rev = "787a232" }
+# patch to remove preserve_order from serde_json
+bson = { git = "https://github.com/risingwavelabs/bson-rust", rev = "e5175ec" }
 
 [workspace.metadata.dylint]
 libraries = [{ path = "./lints" }]
diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml
index b2a885a4ba2e6..bbd9a3c1dabc3 100644
--- a/ci/docker-compose.yml
+++ b/ci/docker-compose.yml
@@ -99,6 +99,8 @@ services:
       - doris-server
       - starrocks-fe-server
       - starrocks-be-server
+      - mongodb
+      - mongodb-setup
       - sqlserver-server
     volumes:
       - ..:/risingwave
diff --git a/ci/scripts/e2e-mongodb-sink-test.sh b/ci/scripts/e2e-mongodb-sink-test.sh
new file mode 100755
index 0000000000000..6ec6e97cf0fe1
--- /dev/null
+++ b/ci/scripts/e2e-mongodb-sink-test.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+while getopts 'p:' opt; do
+    case ${opt} in
+        p )
+            profile=$OPTARG
+            ;;
+        \? )
+            echo "Invalid Option: -$OPTARG" 1>&2
+            exit 1
+            ;;
+        : )
+            echo "Invalid option: $OPTARG requires an argument" 1>&2
+            ;;
+    esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+sleep 1
+
+# install the mongo shell
+wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb
+wget https://repo.mongodb.org/apt/ubuntu/dists/focal/mongodb-org/4.4/multiverse/binary-amd64/mongodb-org-shell_4.4.28_amd64.deb
+dpkg -i libssl1.1_1.1.1f-1ubuntu2_amd64.deb
+dpkg -i mongodb-org-shell_4.4.28_amd64.deb
+
+echo '> ping mongodb'
+echo 'db.runCommand({ping: 1})' | mongo mongodb://mongodb:27017
+echo '> rs config'
+echo 'rs.conf()' | mongo mongodb://mongodb:27017
+echo '> run mongodb sink test..'
+
+sqllogictest -p 4566 -d dev './e2e_test/sink/mongodb_sink.slt'
+sleep 1
+
+append_only_result=$(mongo mongodb://mongodb:27017 --eval 'db.getSiblingDB("demo").t1.countDocuments({})' | tail -n 1)
+if [ "$append_only_result" != "1" ]; then
+    echo "The append-only output is not as expected."
+    exit 1
+fi
+
+upsert_and_dynamic_coll_result1=$(mongo mongodb://mongodb:27017 --eval 'db.getSiblingDB("demo").t2.countDocuments({})' | tail -n 1)
+if [ "$upsert_and_dynamic_coll_result1" != "1" ]; then
+    echo "The upsert output is not as expected."
+    exit 1
+fi
+
+upsert_and_dynamic_coll_result2=$(mongo mongodb://mongodb:27017 --eval 'db.getSiblingDB("shard_2024_01").tenant_1.countDocuments({})' | tail -n 1)
+if [ "$upsert_and_dynamic_coll_result2" != "1" ]; then
+    echo "The upsert output is not as expected."
+    exit 1
+fi
+
+compound_pk_result=$(mongo mongodb://mongodb:27017 --eval 'db.getSiblingDB("demo").t3.countDocuments({})' | tail -n 1)
+if [ "$compound_pk_result" != "1" ]; then
+    echo "The upsert output is not as expected."
+    exit 1
+fi
+
+echo "Mongodb sink check passed"
+
+echo "--- Kill cluster"
+risedev ci-kill
\ No newline at end of file
diff --git a/ci/scripts/e2e-source-test.sh b/ci/scripts/e2e-source-test.sh
index 84fff651b547c..c4b4713af81cc 100755
--- a/ci/scripts/e2e-source-test.sh
+++ b/ci/scripts/e2e-source-test.sh
@@ -33,6 +33,7 @@ tar xf ./risingwave-connector.tar.gz -C ./connector-node
 
 echo "--- Install dependencies"
 python3 -m pip install --break-system-packages requests protobuf fastavro confluent_kafka jsonschema
+apt-get -y install jq
 
 echo "--- e2e, inline test"
 RUST_LOG="debug,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
@@ -134,21 +135,7 @@ risedev slt './e2e_test/source/cdc/cdc_share_stream_drop.slt'
 
 echo "--- Kill cluster"
 risedev ci-kill
-
-echo "--- e2e, ci-1cn-1fe, protobuf schema registry"
 export RISINGWAVE_CI=true
-RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
-risedev ci-start ci-1cn-1fe
-python3 e2e_test/schema_registry/pb.py "message_queue:29092" "http://schemaregistry:8082" "sr_pb_test" 20 user
-echo "make sure google/protobuf/source_context.proto is NOT in schema registry"
-curl --silent 'http://schemaregistry:8082/subjects'; echo
-# curl --silent --head -X GET 'http://schemaregistry:8082/subjects/google%2Fprotobuf%2Fsource_context.proto/versions' | grep 404
-curl --silent 'http://schemaregistry:8082/subjects' | grep -v 'google/protobuf/source_context.proto'
-risedev slt './e2e_test/schema_registry/pb.slt'
-risedev slt './e2e_test/schema_registry/alter_sr.slt'
-
-echo "--- Kill cluster"
-risedev ci-kill
 
 echo "--- e2e, ci-kafka-plus-pubsub, kafka and pubsub source"
 RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml
index c9eaf5cf0c38d..13c98c6bff9f8 100644
--- a/ci/workflows/main-cron.yml
+++ b/ci/workflows/main-cron.yml
@@ -872,7 +872,7 @@ steps:
     key: "e2e-sqlserver-sink-tests"
     command: "ci/scripts/e2e-sqlserver-sink-test.sh -p ci-release"
     if: |
-      !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+      !(build.pull_request.labels includes "ci/main-cron/run-selected") && build.env("CI_STEPS") == null
       || build.pull_request.labels includes "ci/run-e2e-sqlserver-sink-tests"
       || build.env("CI_STEPS") =~ /(^|,)e2e-sqlserver-sink-tests?(,|$$)/
     depends_on:
@@ -925,6 +925,25 @@ steps:
     timeout_in_minutes: 10
     retry: *auto-retry
 
+  - label: "end-to-end mongodb sink test"
+    key: "e2e-mongodb-sink-tests"
+    command: "ci/scripts/e2e-mongodb-sink-test.sh -p ci-release"
+    if: |
+      !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+      || build.pull_request.labels includes "ci/run-e2e-mongodb-sink-tests"
+      || build.env("CI_STEPS") =~ /(^|,)e2e-mongodb-sink-tests?(,|$$)/
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v5.1.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
   - label: "connector node integration test Java {{matrix.java_version}}"
     key: "connector-node-integration-test"
     command: "ci/scripts/connector-node-integration-test.sh -p ci-release -v {{matrix.java_version}}"
diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml
index cc5e670fe078a..ae8db23bbeb81 100644
--- a/ci/workflows/pull-request.yml
+++ b/ci/workflows/pull-request.yml
@@ -423,6 +423,21 @@ steps:
     timeout_in_minutes: 10
     retry: *auto-retry
 
+  - label: "end-to-end mongodb sink test"
+    if: build.pull_request.labels includes "ci/run-e2e-mongodb-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-mongodb-sink-tests?(,|$$)/
+    command: "ci/scripts/e2e-mongodb-sink-test.sh -p ci-dev"
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v5.1.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
   - label: "e2e java-binding test"
     if: build.pull_request.labels includes "ci/run-java-binding-tests" || build.env("CI_STEPS") =~ /(^|,)java-binding-tests?(,|$$)/
     command: "ci/scripts/java-binding-test.sh -p ci-dev"
diff --git a/dashboard/package-lock.json b/dashboard/package-lock.json
index 857888e4d4cfe..c06e209600477 100644
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -11513,16 +11513,16 @@
       "devOptional": true
     },
     "node_modules/ws": {
-      "version": "8.8.1",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.8.1.tgz",
-      "integrity": "sha512-bGy2JzvzkPowEJV++hF07hAD6niYSr0JzBNo/J29WsB57A2r7Wlc1UFcTR9IzrPvuNVO4B8LGqF8qcpsVOhJCA==",
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz",
+      "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==",
       "optional": true,
       "engines": {
         "node": ">=10.0.0"
       },
       "peerDependencies": {
         "bufferutil": "^4.0.1",
-        "utf-8-validate": "^5.0.2"
+        "utf-8-validate": ">=5.0.2"
       },
       "peerDependenciesMeta": {
         "bufferutil": {
@@ -20080,9 +20080,9 @@
       "devOptional": true
     },
     "ws": {
-      "version": "8.8.1",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.8.1.tgz",
-      "integrity": "sha512-bGy2JzvzkPowEJV++hF07hAD6niYSr0JzBNo/J29WsB57A2r7Wlc1UFcTR9IzrPvuNVO4B8LGqF8qcpsVOhJCA==",
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz",
+      "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==",
       "optional": true,
       "requires": {}
     },
diff --git a/e2e_test/schema_registry/alter_sr.slt b/e2e_test/schema_registry/alter_sr.slt
deleted file mode 100644
index d703c0401a35e..0000000000000
--- a/e2e_test/schema_registry/alter_sr.slt
+++ /dev/null
@@ -1,80 +0,0 @@
-# Before running this test, seed data into kafka:
-#   python3 e2e_test/schema_registry/pb.py <brokerlist> <schema-registry-url> <topic> <num-records>
-
-statement ok
-CREATE SOURCE src_user WITH (
-    connector = 'kafka',
-    topic = 'sr_pb_test',
-    properties.bootstrap.server = 'message_queue:29092',
-    scan.startup.mode = 'earliest'
-)
-FORMAT PLAIN ENCODE PROTOBUF(
-    schema.registry = 'http://schemaregistry:8082',
-    message = 'test.User'
-);
-
-statement ok
-CREATE MATERIALIZED VIEW mv_user AS SELECT * FROM src_user;
-
-statement ok
-CREATE TABLE t_user WITH (
-    connector = 'kafka',
-    topic = 'sr_pb_test',
-    properties.bootstrap.server = 'message_queue:29092',
-    scan.startup.mode = 'earliest'
-)
-FORMAT PLAIN ENCODE PROTOBUF(
-    schema.registry = 'http://schemaregistry:8082',
-    message = 'test.User'
-);
-
-statement error
-SELECT age FROM mv_user;
-
-statement error
-SELECT age FROM t_user;
-
-# Push more events with extended fields
-system ok
-python3 e2e_test/schema_registry/pb.py "message_queue:29092" "http://schemaregistry:8082" "sr_pb_test" 5 user_with_more_fields
-
-sleep 5s
-
-# Refresh source schema
-statement ok
-ALTER SOURCE src_user REFRESH SCHEMA;
-
-statement ok
-CREATE MATERIALIZED VIEW mv_user_more AS SELECT * FROM src_user;
-
-# Refresh table schema
-statement ok
-ALTER TABLE t_user REFRESH SCHEMA;
-
-query IIII
-SELECT COUNT(*), MAX(age), MIN(age), SUM(age) FROM mv_user_more;
-----
-25 4 0 10
-
-# Push more events with extended fields
-system ok
-python3 e2e_test/schema_registry/pb.py "message_queue:29092" "http://schemaregistry:8082" "sr_pb_test" 5 user_with_more_fields
-
-sleep 5s
-
-query IIII
-SELECT COUNT(*), MAX(age), MIN(age), SUM(age) FROM t_user;
-----
-30 4 0 10
-
-statement ok
-DROP MATERIALIZED VIEW mv_user_more;
-
-statement ok
-DROP TABLE t_user;
-
-statement ok
-DROP MATERIALIZED VIEW mv_user;
-
-statement ok
-DROP SOURCE src_user;
diff --git a/e2e_test/schema_registry/pb.slt b/e2e_test/schema_registry/pb.slt
deleted file mode 100644
index 7b60b4fa8d7a4..0000000000000
--- a/e2e_test/schema_registry/pb.slt
+++ /dev/null
@@ -1,50 +0,0 @@
-# Before running this test, seed data into kafka:
-#   python3 e2e_test/schema_registry/pb.py <brokerlist> <schema-registry-url> <topic> <num-records> <pb_message>
-
-# Create a table.
-statement ok
-create table sr_pb_test with (
-    connector = 'kafka',
-    topic = 'sr_pb_test',
-    properties.bootstrap.server = 'message_queue:29092',
-    scan.startup.mode = 'earliest')
-FORMAT plain ENCODE protobuf(
-        schema.registry = 'http://schemaregistry:8082',
-        message = 'test.User'
-    );
-
-# for multiple schema registry nodes
-statement ok
-create table sr_pb_test_bk with (
-    connector = 'kafka',
-    topic = 'sr_pb_test',
-    properties.bootstrap.server = 'message_queue:29092',
-    scan.startup.mode = 'earliest')
-FORMAT plain ENCODE protobuf(
-        schema.registry = 'http://schemaregistry:8082,http://schemaregistry:8082',
-        message = 'test.User'
-    );
-
-# Wait for source
-sleep 10s
-
-# Flush into storage
-statement ok
-flush;
-
-query I
-select count(*) from sr_pb_test;
-----
-20
-
-query IIT
-select min(id), max(id), max((sc).file_name) from sr_pb_test;
-----
-0 19 source/context_019.proto
-
-
-statement ok
-drop table sr_pb_test;
-
-statement ok
-drop table sr_pb_test_bk;
diff --git a/e2e_test/sink/mongodb_sink.slt b/e2e_test/sink/mongodb_sink.slt
new file mode 100644
index 0000000000000..2122993e3003a
--- /dev/null
+++ b/e2e_test/sink/mongodb_sink.slt
@@ -0,0 +1,106 @@
+statement ok
+create table t1(
+    a smallint,
+    b int,
+    c bigint,
+    d rw_int256,
+    e real,
+    f double precision,
+    g varchar,
+    h bytea,
+    i date,
+    j time,
+    k timestamp,
+    l timestamptz,
+    m interval,
+    n STRUCT <b STRUCT<c INTEGER>, d INTEGER>,
+    o varchar[],
+    p jsonb
+) append only;
+
+statement ok
+create sink t1_sink from t1
+with (
+    connector='mongodb',
+    type = 'append-only',
+    mongodb.url = 'mongodb://mongodb:27017/?replicaSet=rs0',
+    collection.name = 'demo.t1',
+    mongodb.bulk_write.max_entries = '1024'
+);
+
+statement ok
+insert into t1 values(1, 2, 3, 4, 5.0, 6.0, '7', '\xDe00BeEf', date '2022-04-08', time '18:20:49',
+                      '2022-03-13 01:00:00'::timestamp, '2022-03-13 01:00:00Z'::timestamptz, interval '4 hour',
+                      ROW(ROW(8), 9), ARRAY['a', 'b', 'c'], '{"a": [{"b": 1}], "c": true}'::jsonb);
+
+statement ok
+create table t2(
+    _id bigint primary key,
+    collection_name varchar,
+    value varchar
+);
+
+statement ok
+create sink t2_sink from t2
+with (
+    connector='mongodb',
+    type = 'upsert',
+    mongodb.url = 'mongodb://mongodb:27017/?replicaSet=rs0',
+    collection.name = 'demo.t2',
+    mongodb.bulk_write.max_entries = '1024',
+    collection.name.field = 'collection_name',
+    collection.name.field.drop = 'true',
+    primary_key='_id'
+);
+
+statement ok
+insert into t2 values(1, 'shard_2024_01.tenant_1', 'data');
+
+statement ok
+insert into t2 values(2, '', 'data');
+
+statement ok
+create table t3(
+    a int,
+    b int,
+    value text,
+    primary key (a,b)
+);
+
+statement ok
+create sink t3_sink from t3
+with (
+    connector='mongodb',
+    type = 'upsert',
+    mongodb.url = 'mongodb://mongodb:27017/?replicaSet=rs0',
+    collection.name = 'demo.t3',
+    mongodb.bulk_write.max_entries = '1024',
+    primary_key='a,b'
+);
+
+statement ok
+delete from t3 where a = 1 and b = 2;
+
+statement ok
+insert into t3 values(1, 2, 'abc');
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK t1_sink;
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP SINK t2_sink;
+
+statement ok
+DROP TABLE t2;
+
+statement ok
+DROP SINK t3_sink;
+
+statement ok
+DROP TABLE t3;
\ No newline at end of file
diff --git a/e2e_test/source/cdc/mysql_cdc.sql b/e2e_test/source/cdc/mysql_cdc.sql
index 2c53e57748163..95c7c53ae1ef0 100644
--- a/e2e_test/source/cdc/mysql_cdc.sql
+++ b/e2e_test/source/cdc/mysql_cdc.sql
@@ -19,15 +19,15 @@ VALUES (default,"scooter","Small 2-wheel scooter"),
        (default,"hammer","14oz carpenter's hammer"),
        (default,"hammer","16oz carpenter's hammer"),
        (default,"rocks","box of assorted rocks"),
-       (default,"jacket","water resistent black wind breaker"),
+       (default,"jacket","water resistant black wind breaker"),
        (default,"spare tire","24 inch spare tire");
 
 
 CREATE TABLE orders (
     order_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
     order_date DATETIME NOT NULL,
-    customer_name VARCHAR(255) NOT NULL,
-    price DECIMAL(10, 5) NOT NULL,
+    `cusTomer_Name` VARCHAR(255) NOT NULL,
+    `priCE` DECIMAL(10, 5) NOT NULL,
     product_id INTEGER NOT NULL,
     order_status BOOLEAN NOT NULL -- Whether order has been placed
 ) AUTO_INCREMENT = 10001;
diff --git a/e2e_test/source_inline/commands.toml b/e2e_test/source_inline/commands.toml
index 48342bceafd42..57d09d8237efa 100644
--- a/e2e_test/source_inline/commands.toml
+++ b/e2e_test/source_inline/commands.toml
@@ -37,6 +37,12 @@ set -e
 if [ -n "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" ]; then
     echo "Deleting all Kafka topics..."
     rpk topic delete -r "*"
+    echo "Deleting all schema registry subjects"
+    rpk sr subject list | while read -r subject; do
+        echo "Deleting schema registry subject: $subject"
+        rpk sr subject delete "$subject"
+        rpk sr subject delete "$subject" --permanent
+    done
 else
     echo "No Kafka to clean."
 fi
diff --git a/e2e_test/source_inline/kafka/avro/alter_source.slt b/e2e_test/source_inline/kafka/avro/alter_source.slt
new file mode 100644
index 0000000000000..e60bf5c0295b0
--- /dev/null
+++ b/e2e_test/source_inline/kafka/avro/alter_source.slt
@@ -0,0 +1,70 @@
+control substitution on
+
+# https://github.com/risingwavelabs/risingwave/issues/16486
+
+# cleanup
+system ok
+rpk topic delete 'avro_alter_source_test' || true; \\
+(rpk sr subject delete 'avro_alter_source_test-value' && rpk sr subject delete 'avro_alter_source_test-value' --permanent) || true;
+
+# create topic and sr subject
+system ok
+rpk topic create 'avro_alter_source_test'
+
+system ok
+echo '{"type":"record","name":"Root","fields":[{"name":"foo","type":"string"}]}' | jq '{"schema": tojson}' \\
+| curl -X POST -H 'content-type:application/json' -d @- '${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/avro_alter_source_test-value/versions'
+
+statement ok
+create source s
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'avro_alter_source_test'
+)
+FORMAT PLAIN ENCODE AVRO (
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
+);
+
+# create a new version of schema and produce a message
+system ok
+echo '{"type":"record","name":"Root","fields":[{"name":"bar","type":"int","default":0},{"name":"foo","type":"string"}]}' | jq '{"schema": tojson}' \\
+| curl -X POST -H 'content-type:application/json' -d @- '${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/avro_alter_source_test-value/versions'
+
+system ok
+echo '{"foo":"ABC", "bar":1}' | rpk topic produce --schema-id=topic avro_alter_source_test
+
+query ?
+select * from s
+----
+ABC
+
+statement error
+alter source s format plain encode json;
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Feature is not yet implemented: the original definition is FORMAT Plain ENCODE Avro, and altering them is not supported yet
+No tracking issue yet. Feel free to submit a feature request at https://github.com/risingwavelabs/risingwave/issues/new?labels=type%2Ffeature&template=feature_request.yml
+
+
+statement ok
+alter source s format plain encode avro (schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}');
+
+query ??
+select * from s
+----
+ABC	1
+
+statement ok
+create materialized view mv as select * from s;
+
+sleep 2s
+
+query ??
+select * from mv
+----
+ABC	1
+
+statement ok
+drop source s cascade;
diff --git a/e2e_test/source_inline/kafka/avro/name_strategy.slt b/e2e_test/source_inline/kafka/avro/name_strategy.slt
index 737b97316cc9a..09bd171bafe37 100644
--- a/e2e_test/source_inline/kafka/avro/name_strategy.slt
+++ b/e2e_test/source_inline/kafka/avro/name_strategy.slt
@@ -126,12 +126,12 @@ FROM
 ORDER BY
  "ID";
 ----
-update id1 -1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z -21474836.47 NULL NULL NULL
-delete id2 2 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 99999999.99 NULL NULL NULL
-delete id3 3 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.47 NULL NULL NULL
-delete id5 5 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL
-NULL id6 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL (abcdefg) NULL
-NULL id7 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL NULL 67e55044-10b1-426f-9247-bb680e5fe0c8
+update id1 -1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z -21474836.47 NULL NULL NULL NaN
+delete id2 2 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 99999999.99 NULL NULL NULL NaN
+delete id3 3 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.47 NULL NULL NULL NaN
+delete id5 5 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL NaN
+NULL id6 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL (abcdefg) NULL NaN
+NULL id7 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL NULL 67e55044-10b1-426f-9247-bb680e5fe0c8 NaN
 
 
 query II
@@ -142,15 +142,15 @@ FROM
 ORDER BY
  "ID";
 ----
-update id1 1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z 99999999.99 NULL NULL NULL
-update id1 -1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z -21474836.47 NULL NULL NULL
-delete id2 2 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 99999999.99 NULL NULL NULL
-delete id3 3 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.47 NULL NULL NULL
-delete id4 4 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL
-delete id5 5 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL
-NULL id6 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL (abcdefg) NULL
-NULL id7 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL NULL 67e55044-10b1-426f-9247-bb680e5fe0c8
-NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+update id1 1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z 99999999.99 NULL NULL NULL NaN
+update id1 -1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z -21474836.47 NULL NULL NULL NaN
+delete id2 2 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 99999999.99 NULL NULL NULL NaN
+delete id3 3 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.47 NULL NULL NULL NaN
+delete id4 4 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL NaN
+delete id5 5 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL NaN
+NULL id6 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL (abcdefg) NULL NaN
+NULL id7 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL NULL 67e55044-10b1-426f-9247-bb680e5fe0c8 NaN
+NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
 
 
 query II
@@ -161,12 +161,12 @@ FROM
 ORDER BY
  "ID";
 ----
-update id1 -1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z -21474836.47 NULL NULL NULL
-delete id2 2 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 99999999.99 NULL NULL NULL
-delete id3 3 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.47 NULL NULL NULL
-delete id5 5 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL
-NULL id6 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL (abcdefg) NULL
-NULL id7 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL NULL 67e55044-10b1-426f-9247-bb680e5fe0c8
+update id1 -1 6768 6970 value9 7172 info9 2021-05-18T07:59:58.714Z -21474836.47 NULL NULL NULL NaN
+delete id2 2 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 99999999.99 NULL NULL NULL NaN
+delete id3 3 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.47 NULL NULL NULL NaN
+delete id5 5 7778 7980 value10 8182 info10 2021-05-19T15:22:45.539Z 21474836.49 NULL NULL NULL NaN
+NULL id6 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL (abcdefg) NULL NaN
+NULL id7 NULL NULL NULL NULL NULL NULL NULL -0.01 NULL NULL 67e55044-10b1-426f-9247-bb680e5fe0c8 NaN
 
 
 
diff --git a/e2e_test/source_inline/kafka/avro/upsert_avro_json b/e2e_test/source_inline/kafka/avro/upsert_avro_json
index 426b27c41fb98..ec1fc3fd008a5 100644
--- a/e2e_test/source_inline/kafka/avro/upsert_avro_json
+++ b/e2e_test/source_inline/kafka/avro/upsert_avro_json
@@ -1,4 +1,4 @@
-"string"^{"type":"record","name":"OBJ_ATTRIBUTE_VALUE","namespace":"CPLM","fields":[{"name":"op_type","type":["null","string"],"default":null},{"name":"ID","type":["null","string"],"default":null},{"name":"CLASS_ID","type":["null","string"],"default":null},{"name":"ITEM_ID","type":["null","string"],"default":null},{"name":"ATTR_ID","type":["null","string"],"default":null},{"name":"ATTR_VALUE","type":["null","string"],"default":null},{"name":"ORG_ID","type":["null","string"],"default":null},{"name":"UNIT_INFO","type":["null","string"],"default":null},{"name":"UPD_TIME","type":["null","string"],"default":null},{"name":"DEC_VAL","type":[{"type":"bytes","logicalType":"decimal","precision":10,"scale":2},"null"],"default":"\u00ff"},{"name":"REFERRED","type":["null",{"type":"record","name":"REFERRED_TYPE","fields":[{"name":"a","type":"string"}]}],"default":null},{"name":"REF","type":["null","REFERRED_TYPE"],"default":null},{"name":"uuid","type":["null",{"type":"string","logicalType":"uuid"}],"default":null}],"connect.name":"CPLM.OBJ_ATTRIBUTE_VALUE"}
+"string"^{"type":"record","name":"OBJ_ATTRIBUTE_VALUE","namespace":"CPLM","fields":[{"name":"op_type","type":["null","string"],"default":null},{"name":"ID","type":["null","string"],"default":null},{"name":"CLASS_ID","type":["null","string"],"default":null},{"name":"ITEM_ID","type":["null","string"],"default":null},{"name":"ATTR_ID","type":["null","string"],"default":null},{"name":"ATTR_VALUE","type":["null","string"],"default":null},{"name":"ORG_ID","type":["null","string"],"default":null},{"name":"UNIT_INFO","type":["null","string"],"default":null},{"name":"UPD_TIME","type":["null","string"],"default":null},{"name":"DEC_VAL","type":[{"type":"bytes","logicalType":"decimal","precision":10,"scale":2},"null"],"default":"\u00ff"},{"name":"REFERRED","type":["null",{"type":"record","name":"REFERRED_TYPE","fields":[{"name":"a","type":"string"}]}],"default":null},{"name":"REF","type":["null","REFERRED_TYPE"],"default":null},{"name":"uuid","type":["null",{"type":"string","logicalType":"uuid"}],"default":null},{"name":"rate","type":"double","default":"NaN"}],"connect.name":"CPLM.OBJ_ATTRIBUTE_VALUE"}
 "id1"^{"op_type": {"string": "update"}, "ID": {"string": "id1"}, "CLASS_ID": {"string": "1"}, "ITEM_ID": {"string": "6768"}, "ATTR_ID": {"string": "6970"}, "ATTR_VALUE": {"string": "value9"}, "ORG_ID": {"string": "7172"}, "UNIT_INFO": {"string": "info9"}, "UPD_TIME": {"string": "2021-05-18T07:59:58.714Z"}, "DEC_VAL": {"bytes": "\u0002\u0054\u000b\u00e3\u00ff"}}
 "id2"^{"op_type": {"string": "delete"}, "ID": {"string": "id2"}, "CLASS_ID": {"string": "2"}, "ITEM_ID": {"string": "7778"}, "ATTR_ID": {"string": "7980"}, "ATTR_VALUE": {"string": "value10"}, "ORG_ID": {"string": "8182"}, "UNIT_INFO": {"string": "info10"}, "UPD_TIME": {"string": "2021-05-19T15:22:45.539Z"}, "DEC_VAL": {"bytes": "\u0002\u0054\u000b\u00e3\u00ff"}}
 "id3"^{"op_type": {"string": "delete"}, "ID": {"string": "id3"}, "CLASS_ID": {"string": "3"}, "ITEM_ID": {"string": "7778"}, "ATTR_ID": {"string": "7980"}, "ATTR_VALUE": {"string": "value10"}, "ORG_ID": {"string": "8182"}, "UNIT_INFO": {"string": "info10"}, "UPD_TIME": {"string": "2021-05-19T15:22:45.539Z"}, "DEC_VAL": {"bytes": "\u007f\u00ff\u00ff\u00ff"}}
diff --git a/e2e_test/source_inline/kafka/protobuf/alter_source.slt b/e2e_test/source_inline/kafka/protobuf/alter_source.slt
new file mode 100644
index 0000000000000..c9db2df3ca4ee
--- /dev/null
+++ b/e2e_test/source_inline/kafka/protobuf/alter_source.slt
@@ -0,0 +1,91 @@
+control substitution on
+
+system ok
+rpk topic delete sr_pb_test || true; \\
+(rpk sr subject delete 'sr_pb_test-value' && rpk sr subject delete 'sr_pb_test-value' --permanent) || true;
+
+system ok
+python3 e2e_test/source_inline/kafka/protobuf/pb.py "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" "sr_pb_test" 20 user
+
+statement ok
+CREATE SOURCE src_user
+INCLUDE timestamp -- include explicitly here to test a bug found in https://github.com/risingwavelabs/risingwave/pull/17293
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'sr_pb_test',
+    scan.startup.mode = 'earliest'
+)
+FORMAT PLAIN ENCODE PROTOBUF(
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}',
+    message = 'test.User'
+);
+
+statement ok
+CREATE MATERIALIZED VIEW mv_user AS SELECT * FROM src_user;
+
+statement ok
+CREATE TABLE t_user WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'sr_pb_test',
+    scan.startup.mode = 'earliest'
+)
+FORMAT PLAIN ENCODE PROTOBUF(
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}',
+    message = 'test.User'
+);
+
+statement error
+SELECT age FROM mv_user;
+
+statement error
+SELECT age FROM t_user;
+
+# Push more events with extended fields
+system ok
+python3 e2e_test/source_inline/kafka/protobuf/pb.py "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" "sr_pb_test" 5 user_with_more_fields
+
+sleep 5s
+
+# Refresh source schema
+statement ok
+ALTER SOURCE src_user REFRESH SCHEMA;
+
+statement ok
+CREATE MATERIALIZED VIEW mv_user_more AS SELECT * FROM src_user;
+
+# Refresh table schema. It consume new data before refresh, so the new fields are NULLs
+statement ok
+ALTER TABLE t_user REFRESH SCHEMA;
+
+query ????
+SELECT COUNT(*), MAX(age), MIN(age), SUM(age) FROM mv_user_more;
+----
+25	104	0	510
+
+query ????
+SELECT COUNT(*), MAX(age), MIN(age), SUM(age) FROM t_user;
+----
+25	NULL	NULL	NULL
+
+# Push more events with extended fields
+system ok
+python3 e2e_test/source_inline/kafka/protobuf/pb.py "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" "sr_pb_test" 5 user_with_more_fields
+
+sleep 5s
+
+query ????
+SELECT COUNT(*), MAX(age), MIN(age), SUM(age) FROM t_user;
+----
+30	104	100	510
+
+statement ok
+DROP MATERIALIZED VIEW mv_user_more;
+
+statement ok
+DROP TABLE t_user;
+
+statement ok
+DROP MATERIALIZED VIEW mv_user;
+
+statement ok
+DROP SOURCE src_user;
diff --git a/e2e_test/source_inline/kafka/protobuf/basic.slt b/e2e_test/source_inline/kafka/protobuf/basic.slt
new file mode 100644
index 0000000000000..82eb61560aa4d
--- /dev/null
+++ b/e2e_test/source_inline/kafka/protobuf/basic.slt
@@ -0,0 +1,58 @@
+control substitution on
+
+system ok
+rpk topic delete sr_pb_test || true; \\
+(rpk sr subject delete 'sr_pb_test-value' && rpk sr subject delete 'sr_pb_test-value' --permanent) || true;
+
+system ok
+python3 e2e_test/source_inline/kafka/protobuf/pb.py "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" "sr_pb_test" 20 user
+
+# make sure google/protobuf/source_context.proto is NOT in schema registry
+system ok
+curl --silent '${RISEDEV_SCHEMA_REGISTRY_URL}' | grep -v 'google/protobuf/source_context.proto'
+
+# Create a table.
+statement ok
+create table sr_pb_test with (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'sr_pb_test',
+    scan.startup.mode = 'earliest')
+FORMAT plain ENCODE protobuf(
+        schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}',
+        message = 'test.User'
+    );
+
+# for multiple schema registry nodes
+statement ok
+create table sr_pb_test_bk with (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'sr_pb_test',
+    scan.startup.mode = 'earliest')
+FORMAT plain ENCODE protobuf(
+        schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL},${RISEDEV_SCHEMA_REGISTRY_URL}',
+        message = 'test.User'
+    );
+
+# Wait for source
+sleep 2s
+
+# Flush into storage
+statement ok
+flush;
+
+query I
+select count(*) from sr_pb_test;
+----
+20
+
+query IT
+select min(id), max(id), max((sc).file_name) from sr_pb_test;
+----
+0 19 source/context_019.proto
+
+
+statement ok
+drop table sr_pb_test;
+
+statement ok
+drop table sr_pb_test_bk;
diff --git a/e2e_test/schema_registry/pb.py b/e2e_test/source_inline/kafka/protobuf/pb.py
similarity index 73%
rename from e2e_test/schema_registry/pb.py
rename to e2e_test/source_inline/kafka/protobuf/pb.py
index fd6e0dc478b51..4cab50f899e50 100644
--- a/e2e_test/schema_registry/pb.py
+++ b/e2e_test/source_inline/kafka/protobuf/pb.py
@@ -25,6 +25,7 @@ def get_user(i):
         sc=SourceContext(file_name="source/context_{:03}.proto".format(i)),
     )
 
+
 def get_user_with_more_fields(i):
     return user_pb2.User(
         id=i,
@@ -33,15 +34,18 @@ def get_user_with_more_fields(i):
         city="City_{}".format(i),
         gender=user_pb2.MALE if i % 2 == 0 else user_pb2.FEMALE,
         sc=SourceContext(file_name="source/context_{:03}.proto".format(i)),
-        age=i,
+        age=100 + i,
     )
 
-def send_to_kafka(producer_conf, schema_registry_conf, topic, num_records, get_user_fn, pb_message):
+
+def send_to_kafka(
+    producer_conf, schema_registry_conf, topic, num_records, get_user_fn, pb_message
+):
     schema_registry_client = SchemaRegistryClient(schema_registry_conf)
     serializer = ProtobufSerializer(
         pb_message,
         schema_registry_client,
-        {"use.deprecated.format": False, 'skip.known.types': True},
+        {"use.deprecated.format": False, "skip.known.types": True},
     )
 
     producer = Producer(producer_conf)
@@ -60,7 +64,9 @@ def send_to_kafka(producer_conf, schema_registry_conf, topic, num_records, get_u
 
 if __name__ == "__main__":
     if len(sys.argv) < 6:
-        print("pb.py <brokerlist> <schema-registry-url> <topic> <num-records> <pb_message>")
+        print(
+            "pb.py <brokerlist> <schema-registry-url> <topic> <num-records> <pb_message>"
+        )
         exit(1)
 
     broker_list = sys.argv[1]
@@ -69,20 +75,29 @@ def send_to_kafka(producer_conf, schema_registry_conf, topic, num_records, get_u
     num_records = int(sys.argv[4])
     pb_message = sys.argv[5]
 
-    user_pb2 = importlib.import_module(f'protobuf.{pb_message}_pb2')
+    user_pb2 = importlib.import_module(f"{pb_message}_pb2")
 
     all_pb_messages = {
-        'user': get_user,
-        'user_with_more_fields': get_user_with_more_fields,
+        "user": get_user,
+        "user_with_more_fields": get_user_with_more_fields,
     }
 
-    assert pb_message in all_pb_messages, f'pb_message must be one of {list(all_pb_messages.keys())}'
+    assert (
+        pb_message in all_pb_messages
+    ), f"pb_message must be one of {list(all_pb_messages.keys())}"
 
     schema_registry_conf = {"url": schema_registry_url}
     producer_conf = {"bootstrap.servers": broker_list}
 
     try:
-        send_to_kafka(producer_conf, schema_registry_conf, topic, num_records, all_pb_messages[pb_message], user_pb2.User)
+        send_to_kafka(
+            producer_conf,
+            schema_registry_conf,
+            topic,
+            num_records,
+            all_pb_messages[pb_message],
+            user_pb2.User,
+        )
     except Exception as e:
         print("Send Protobuf data to schema registry and kafka failed {}", e)
         exit(1)
diff --git a/e2e_test/schema_registry/protobuf/user.proto b/e2e_test/source_inline/kafka/protobuf/user.proto
similarity index 100%
rename from e2e_test/schema_registry/protobuf/user.proto
rename to e2e_test/source_inline/kafka/protobuf/user.proto
diff --git a/e2e_test/schema_registry/protobuf/user_pb2.py b/e2e_test/source_inline/kafka/protobuf/user_pb2.py
similarity index 100%
rename from e2e_test/schema_registry/protobuf/user_pb2.py
rename to e2e_test/source_inline/kafka/protobuf/user_pb2.py
diff --git a/e2e_test/schema_registry/protobuf/user_with_more_fields.proto b/e2e_test/source_inline/kafka/protobuf/user_with_more_fields.proto
similarity index 100%
rename from e2e_test/schema_registry/protobuf/user_with_more_fields.proto
rename to e2e_test/source_inline/kafka/protobuf/user_with_more_fields.proto
diff --git a/e2e_test/schema_registry/protobuf/user_with_more_fields_pb2.py b/e2e_test/source_inline/kafka/protobuf/user_with_more_fields_pb2.py
similarity index 100%
rename from e2e_test/schema_registry/protobuf/user_with_more_fields_pb2.py
rename to e2e_test/source_inline/kafka/protobuf/user_with_more_fields_pb2.py
diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkUtils.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkUtils.java
index 679deedebcabf..73f0799e44d1d 100644
--- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkUtils.java
+++ b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkUtils.java
@@ -40,6 +40,7 @@ public static SinkFactory getSinkFactory(String sinkName) {
             case "jdbc":
                 return new JDBCSinkFactory();
             case "elasticsearch":
+            case "opensearch":
                 return new EsSinkFactory();
             case "cassandra":
                 return new CassandraFactory();
diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/MySqlValidator.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/MySqlValidator.java
index a2f63a28bbd7c..d20a18185a74d 100644
--- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/MySqlValidator.java
+++ b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/MySqlValidator.java
@@ -187,17 +187,18 @@ private void validateTableSchema() throws SQLException {
             stmt.setString(1, dbName);
             stmt.setString(2, tableName);
 
-            // Field name in lower case -> data type
-            var schema = new HashMap<String, String>();
+            // Field name in lower case -> data type, because MySQL column name is case-insensitive
+            // https://dev.mysql.com/doc/refman/5.7/en/identifier-case-sensitivity.html
+            var upstreamSchema = new HashMap<String, String>();
             var pkFields = new HashSet<String>();
             var res = stmt.executeQuery();
             while (res.next()) {
                 var field = res.getString(1);
                 var dataType = res.getString(2);
                 var key = res.getString(3);
-                schema.put(field, dataType);
+                upstreamSchema.put(field.toLowerCase(), dataType);
                 if (key.equalsIgnoreCase("PRI")) {
-                    pkFields.add(field);
+                    pkFields.add(field.toLowerCase());
                 }
             }
 
@@ -207,7 +208,7 @@ private void validateTableSchema() throws SQLException {
                 if (e.getKey().startsWith(ValidatorUtils.INTERNAL_COLUMN_PREFIX)) {
                     continue;
                 }
-                var dataType = schema.get(e.getKey());
+                var dataType = upstreamSchema.get(e.getKey().toLowerCase());
                 if (dataType == null) {
                     throw ValidatorUtils.invalidArgument(
                             "Column '" + e.getKey() + "' not found in the upstream database");
@@ -218,7 +219,7 @@ private void validateTableSchema() throws SQLException {
                 }
             }
 
-            if (!ValidatorUtils.isPrimaryKeyMatch(tableSchema, pkFields)) {
+            if (!isPrimaryKeyMatch(tableSchema, pkFields)) {
                 throw ValidatorUtils.invalidArgument("Primary key mismatch");
             }
         }
@@ -231,6 +232,18 @@ public void close() throws Exception {
         }
     }
 
+    private boolean isPrimaryKeyMatch(TableSchema sourceSchema, Set<String> pkFields) {
+        if (sourceSchema.getPrimaryKeys().size() != pkFields.size()) {
+            return false;
+        }
+        for (var colName : sourceSchema.getPrimaryKeys()) {
+            if (!pkFields.contains(colName.toLowerCase())) {
+                return false;
+            }
+        }
+        return true;
+    }
+
     private boolean isDataTypeCompatible(String mysqlDataType, Data.DataType.TypeName typeName) {
         int val = typeName.getNumber();
         switch (mysqlDataType) {
diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/PostgresValidator.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/PostgresValidator.java
index 31b016146e000..93d4fdee0bcd4 100644
--- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/PostgresValidator.java
+++ b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/PostgresValidator.java
@@ -188,7 +188,7 @@ private void validateTableSchema() throws SQLException {
                 pkFields.add(name);
             }
 
-            if (!ValidatorUtils.isPrimaryKeyMatch(tableSchema, pkFields)) {
+            if (!isPrimaryKeyMatch(tableSchema, pkFields)) {
                 throw ValidatorUtils.invalidArgument("Primary key mismatch");
             }
         }
@@ -227,6 +227,19 @@ private void validateTableSchema() throws SQLException {
         }
     }
 
+    private boolean isPrimaryKeyMatch(TableSchema sourceSchema, Set<String> pkFields) {
+        if (sourceSchema.getPrimaryKeys().size() != pkFields.size()) {
+            return false;
+        }
+        // postgres column name is case-sensitive
+        for (var colName : sourceSchema.getPrimaryKeys()) {
+            if (!pkFields.contains(colName)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
     private void validatePrivileges() throws SQLException {
         boolean isSuperUser = false;
         if (this.isAwsRds) {
diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/ValidatorUtils.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/ValidatorUtils.java
index 5c7d9ea6d4948..20d631a3267c9 100644
--- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/ValidatorUtils.java
+++ b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/ValidatorUtils.java
@@ -14,12 +14,10 @@
 
 package com.risingwave.connector.source.common;
 
-import com.risingwave.connector.api.TableSchema;
 import com.risingwave.connector.api.source.SourceTypeE;
 import io.grpc.Status;
 import java.io.IOException;
 import java.util.Properties;
-import java.util.Set;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -72,16 +70,4 @@ public static String getJdbcUrl(
                 throw ValidatorUtils.invalidArgument("Unknown source type: " + sourceType);
         }
     }
-
-    public static boolean isPrimaryKeyMatch(TableSchema sourceSchema, Set<String> pkFields) {
-        if (sourceSchema.getPrimaryKeys().size() != pkFields.size()) {
-            return false;
-        }
-        for (var colName : sourceSchema.getPrimaryKeys()) {
-            if (!pkFields.contains(colName)) {
-                return false;
-            }
-        }
-        return true;
-    }
 }
diff --git a/java/connector-node/risingwave-connector-test/pom.xml b/java/connector-node/risingwave-connector-test/pom.xml
index 14b1c7bd65fc0..d3d47b0bc4571 100644
--- a/java/connector-node/risingwave-connector-test/pom.xml
+++ b/java/connector-node/risingwave-connector-test/pom.xml
@@ -128,13 +128,13 @@
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-databind</artifactId>
-            <version>${jackson.version}</version>
+            <version>2.13.5</version>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-core</artifactId>
-            <version>${jackson.version}</version>
+            <version>2.13.5</version>
             <scope>test</scope>
         </dependency>
 
diff --git a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/elasticsearch/EsSinkTest.java b/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/elasticsearch/EsSinkTest.java
index 509f71ec1e569..d2873fac9d216 100644
--- a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/elasticsearch/EsSinkTest.java
+++ b/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/elasticsearch/EsSinkTest.java
@@ -19,6 +19,7 @@
 
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
+import com.risingwave.connector.ElasticRestHighLevelClientAdapter;
 import com.risingwave.connector.EsSink;
 import com.risingwave.connector.EsSinkConfig;
 import com.risingwave.connector.api.TableSchema;
@@ -28,10 +29,10 @@
 import com.risingwave.proto.Data.Op;
 import java.io.IOException;
 import java.util.Map;
+import org.apache.http.HttpHost;
 import org.elasticsearch.action.search.SearchRequest;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHits;
@@ -52,14 +53,14 @@ static TableSchema getTestTableSchema() {
 
     public void testEsSink(ElasticsearchContainer container, String username, String password)
             throws IOException {
-        EsSink sink =
-                new EsSink(
-                        new EsSinkConfig(container.getHttpHostAddress())
-                                .withIndex("test")
-                                .withDelimiter("$")
-                                .withUsername(username)
-                                .withPassword(password),
-                        getTestTableSchema());
+        EsSinkConfig config =
+                new EsSinkConfig(container.getHttpHostAddress())
+                        .withIndex("test")
+                        .withDelimiter("$")
+                        .withUsername(username)
+                        .withPassword(password);
+        config.setConnector("elasticsearch");
+        EsSink sink = new EsSink(config, getTestTableSchema());
         sink.write(
                 Iterators.forArray(
                         new ArraySinkRow(
@@ -74,7 +75,9 @@ public void testEsSink(ElasticsearchContainer container, String username, String
             fail(e.getMessage());
         }
 
-        RestHighLevelClient client = sink.getClient();
+        HttpHost host = HttpHost.create(config.getUrl());
+        ElasticRestHighLevelClientAdapter client =
+                new ElasticRestHighLevelClientAdapter(host, config);
         SearchRequest searchRequest = new SearchRequest("test");
         SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
         searchSourceBuilder.query(QueryBuilders.matchAllQuery());
diff --git a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraUtil.java b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraUtil.java
index a6be8f7fc89c1..8327893f6da9a 100644
--- a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraUtil.java
+++ b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraUtil.java
@@ -79,9 +79,10 @@ private static int getCorrespondingCassandraType(DataType dataType) {
     public static void checkSchema(
             List<ColumnDesc> columnDescs,
             Map<CqlIdentifier, ColumnMetadata> cassandraColumnDescMap) {
-        if (columnDescs.size() != cassandraColumnDescMap.size()) {
+        if (columnDescs.size() > cassandraColumnDescMap.size()) {
             throw Status.FAILED_PRECONDITION
-                    .withDescription("Don't match in the number of columns in the table")
+                    .withDescription(
+                            "The columns of the sink must be equal to or a superset of the target table's columns.")
                     .asRuntimeException();
         }
         for (ColumnDesc columnDesc : columnDescs) {
diff --git a/java/connector-node/risingwave-sink-deltalake/pom.xml b/java/connector-node/risingwave-sink-deltalake/pom.xml
index bab3c5320fae2..9a89853ff9f39 100644
--- a/java/connector-node/risingwave-sink-deltalake/pom.xml
+++ b/java/connector-node/risingwave-sink-deltalake/pom.xml
@@ -18,7 +18,7 @@
     <properties>
         <maven.compiler.source>11</maven.compiler.source>
         <maven.compiler.target>11</maven.compiler.target>
-        <parquet.version>1.12.3</parquet.version>
+        <parquet.version>1.14.0</parquet.version>
         <maven.javadoc.skip>true</maven.javadoc.skip>
     </properties>
 
diff --git a/java/connector-node/risingwave-sink-es-7/pom.xml b/java/connector-node/risingwave-sink-es-7/pom.xml
index 9c8515098d7d8..4ff4bd76ef109 100644
--- a/java/connector-node/risingwave-sink-es-7/pom.xml
+++ b/java/connector-node/risingwave-sink-es-7/pom.xml
@@ -51,6 +51,14 @@
             <groupId>org.elasticsearch.client</groupId>
             <artifactId>elasticsearch-rest-high-level-client</artifactId>
         </dependency>
+        <dependency>
+			<groupId>org.opensearch</groupId>
+			<artifactId>opensearch</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.opensearch.client</groupId>
+			<artifactId>opensearch-rest-high-level-client</artifactId>
+		</dependency>
         <dependency>
             <groupId>org.apache.httpcomponents</groupId>
             <artifactId>httpclient</artifactId>
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkListener.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkListener.java
new file mode 100644
index 0000000000000..4ce1165ba1baf
--- /dev/null
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkListener.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2024 RisingWave Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.risingwave.connector;
+
+import com.risingwave.connector.EsSink.RequestTracker;
+import org.elasticsearch.action.bulk.BulkRequest;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BulkListener
+        implements org.elasticsearch.action.bulk.BulkProcessor.Listener,
+                org.opensearch.action.bulk.BulkProcessor.Listener {
+    private static final Logger LOG = LoggerFactory.getLogger(EsSink.class);
+    private final RequestTracker requestTracker;
+
+    public BulkListener(RequestTracker requestTracker) {
+        this.requestTracker = requestTracker;
+    }
+
+    @Override
+    public void beforeBulk(long executionId, org.elasticsearch.action.bulk.BulkRequest request) {
+        LOG.debug("Sending bulk of {} actions to Elasticsearch.", request.numberOfActions());
+    }
+
+    @Override
+    public void afterBulk(
+            long executionId,
+            org.elasticsearch.action.bulk.BulkRequest request,
+            org.elasticsearch.action.bulk.BulkResponse response) {
+        if (response.hasFailures()) {
+            String errMessage =
+                    String.format(
+                            "Bulk of %d actions failed. Failure: %s",
+                            request.numberOfActions(), response.buildFailureMessage());
+            this.requestTracker.addErrResult(errMessage);
+        } else {
+            this.requestTracker.addOkResult(request.numberOfActions());
+            LOG.debug("Sent bulk of {} actions to Elasticsearch.", request.numberOfActions());
+        }
+    }
+
+    /** This method is called when the bulk failed and raised a Throwable */
+    @Override
+    public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
+        String errMessage =
+                String.format(
+                        "Bulk of %d actions failed. Failure: %s",
+                        request.numberOfActions(), failure.getMessage());
+        this.requestTracker.addErrResult(errMessage);
+    }
+
+    @Override
+    public void beforeBulk(long executionId, org.opensearch.action.bulk.BulkRequest request) {
+        LOG.debug("Sending bulk of {} actions to Opensearch.", request.numberOfActions());
+    }
+
+    @Override
+    public void afterBulk(
+            long executionId,
+            org.opensearch.action.bulk.BulkRequest request,
+            org.opensearch.action.bulk.BulkResponse response) {
+        if (response.hasFailures()) {
+            String errMessage =
+                    String.format(
+                            "Bulk of %d actions failed. Failure: %s",
+                            request.numberOfActions(), response.buildFailureMessage());
+            this.requestTracker.addErrResult(errMessage);
+        } else {
+            this.requestTracker.addOkResult(request.numberOfActions());
+            LOG.debug("Sent bulk of {} actions to Opensearch.", request.numberOfActions());
+        }
+    }
+
+    @Override
+    public void afterBulk(
+            long executionId, org.opensearch.action.bulk.BulkRequest request, Throwable failure) {
+        String errMessage =
+                String.format(
+                        "Bulk of %d actions failed. Failure: %s",
+                        request.numberOfActions(), failure.getMessage());
+        this.requestTracker.addErrResult(errMessage);
+    }
+}
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkRequestConsumerFactory.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkProcessorAdapter.java
similarity index 58%
rename from java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkRequestConsumerFactory.java
rename to java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkProcessorAdapter.java
index e26248b5fef74..d72ebe2833953 100644
--- a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkRequestConsumerFactory.java
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/BulkProcessorAdapter.java
@@ -16,14 +16,14 @@
 
 package com.risingwave.connector;
 
-import java.util.function.BiConsumer;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.bulk.BulkResponse;
+import java.util.concurrent.TimeUnit;
 
-/**
- * {@link BulkRequestConsumerFactory} is used to bridge incompatible Elasticsearch Java API calls
- * across different Elasticsearch versions.
- */
-interface BulkRequestConsumerFactory
-        extends BiConsumer<BulkRequest, ActionListener<BulkResponse>> {}
+public interface BulkProcessorAdapter {
+    public void addRow(String index, String key, String doc);
+
+    public void deleteRow(String index, String key);
+
+    public void flush();
+
+    public void awaitClose(long timeout, TimeUnit unit) throws InterruptedException;
+}
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/ElasticBulkProcessorAdapter.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/ElasticBulkProcessorAdapter.java
new file mode 100644
index 0000000000000..de6ab3414f65a
--- /dev/null
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/ElasticBulkProcessorAdapter.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2024 RisingWave Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.risingwave.connector;
+
+import com.risingwave.connector.EsSink.RequestTracker;
+import java.util.concurrent.TimeUnit;
+import org.elasticsearch.action.bulk.BackoffPolicy;
+import org.elasticsearch.action.bulk.BulkProcessor;
+import org.elasticsearch.action.delete.DeleteRequest;
+import org.elasticsearch.action.update.UpdateRequest;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.common.unit.ByteSizeUnit;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.xcontent.XContentType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ElasticBulkProcessorAdapter implements BulkProcessorAdapter {
+    private static final Logger LOG = LoggerFactory.getLogger(EsSink.class);
+    BulkProcessor esBulkProcessor;
+    private final RequestTracker requestTracker;
+
+    public ElasticBulkProcessorAdapter(
+            RequestTracker requestTracker, ElasticRestHighLevelClientAdapter client) {
+        BulkProcessor.Builder builder =
+                BulkProcessor.builder(
+                        (bulkRequest, bulkResponseActionListener) ->
+                                client.bulkAsync(
+                                        bulkRequest,
+                                        RequestOptions.DEFAULT,
+                                        bulkResponseActionListener),
+                        new BulkListener(requestTracker));
+        // Possible feature: move these to config
+        // execute the bulk every 10 000 requests
+        builder.setBulkActions(1000);
+        // flush the bulk every 5mb
+        builder.setBulkSize(new ByteSizeValue(5, ByteSizeUnit.MB));
+        // flush the bulk every 5 seconds whatever the number of requests
+        builder.setFlushInterval(TimeValue.timeValueSeconds(5));
+        // Set the number of concurrent requests
+        builder.setConcurrentRequests(1);
+        // Set a custom backoff policy which will initially wait for 100ms, increase exponentially
+        // and retries up to three times.
+        builder.setBackoffPolicy(
+                BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3));
+        this.esBulkProcessor = builder.build();
+        this.requestTracker = requestTracker;
+    }
+
+    @Override
+    public void flush() {
+        esBulkProcessor.flush();
+    }
+
+    @Override
+    public void awaitClose(long timeout, TimeUnit unit) throws InterruptedException {
+        esBulkProcessor.awaitClose(timeout, unit);
+    }
+
+    @Override
+    public void addRow(String index, String key, String doc) {
+        UpdateRequest updateRequest;
+        updateRequest = new UpdateRequest(index, "_doc", key).doc(doc, XContentType.JSON);
+        updateRequest.docAsUpsert(true);
+        this.requestTracker.addWriteTask();
+        this.esBulkProcessor.add(updateRequest);
+    }
+
+    @Override
+    public void deleteRow(String index, String key) {
+        DeleteRequest deleteRequest;
+        deleteRequest = new DeleteRequest(index, "_doc", key);
+        this.requestTracker.addWriteTask();
+        this.esBulkProcessor.add(deleteRequest);
+    }
+}
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/ElasticRestHighLevelClientAdapter.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/ElasticRestHighLevelClientAdapter.java
new file mode 100644
index 0000000000000..c64def3bef8a7
--- /dev/null
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/ElasticRestHighLevelClientAdapter.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2024 RisingWave Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.risingwave.connector;
+
+import java.io.IOException;
+import org.apache.http.HttpHost;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.impl.client.BasicCredentialsProvider;
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.bulk.BulkRequest;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.search.SearchRequest;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.client.Cancellable;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.client.RestClient;
+import org.elasticsearch.client.RestClientBuilder;
+import org.elasticsearch.client.RestHighLevelClient;
+import org.elasticsearch.client.RestHighLevelClientBuilder;
+
+public class ElasticRestHighLevelClientAdapter implements AutoCloseable {
+    RestHighLevelClient esClient;
+
+    private static RestClientBuilder configureRestClientBuilder(
+            RestClientBuilder builder, EsSinkConfig config) {
+        // Possible config:
+        // 1. Connection path prefix
+        // 2. Username and password
+        if (config.getPassword() != null && config.getUsername() != null) {
+            final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
+            credentialsProvider.setCredentials(
+                    AuthScope.ANY,
+                    new UsernamePasswordCredentials(config.getUsername(), config.getPassword()));
+            builder.setHttpClientConfigCallback(
+                    httpClientBuilder ->
+                            httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider));
+        }
+        // 3. Timeout
+        return builder;
+    }
+
+    public ElasticRestHighLevelClientAdapter(HttpHost host, EsSinkConfig config) {
+        this.esClient =
+                new RestHighLevelClientBuilder(
+                                configureRestClientBuilder(RestClient.builder(host), config)
+                                        .build())
+                        .setApiCompatibilityMode(true)
+                        .build();
+    }
+
+    @Override
+    public void close() throws IOException {
+        esClient.close();
+    }
+
+    public boolean ping(RequestOptions options) throws IOException {
+        boolean flag = esClient.ping(options);
+        return flag;
+    }
+
+    public Cancellable bulkAsync(
+            BulkRequest bulkRequest,
+            RequestOptions options,
+            ActionListener<BulkResponse> listener) {
+        Cancellable cancellable = esClient.bulkAsync(bulkRequest, options, listener);
+        return cancellable;
+    }
+
+    public SearchResponse search(SearchRequest searchRequest, RequestOptions options)
+            throws IOException {
+        return this.esClient.search(searchRequest, options);
+    }
+}
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSink.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSink.java
index cc5977a9c208c..315fc800a2ef0 100644
--- a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSink.java
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSink.java
@@ -1,16 +1,18 @@
-// Copyright 2024 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/*
+ * Copyright 2024 RisingWave Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package com.risingwave.connector;
 
@@ -25,25 +27,6 @@
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import org.apache.http.HttpHost;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-import org.elasticsearch.action.bulk.BackoffPolicy;
-import org.elasticsearch.action.bulk.BulkProcessor;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.RestHighLevelClientBuilder;
-import org.elasticsearch.common.unit.ByteSizeUnit;
-import org.elasticsearch.common.unit.ByteSizeValue;
-import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.xcontent.XContentType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,8 +48,7 @@ public class EsSink extends SinkWriterBase {
     private static final String ERROR_REPORT_TEMPLATE = "Error message %s";
 
     private final EsSinkConfig config;
-    private BulkProcessor bulkProcessor;
-    private final RestHighLevelClient client;
+    private BulkProcessorAdapter bulkProcessor;
 
     // Used to handle the return message of ES and throw errors
     private final RequestTracker requestTracker;
@@ -167,156 +149,36 @@ public EsSink(EsSinkConfig config, TableSchema tableSchema) {
         this.requestTracker = new RequestTracker();
 
         // ApiCompatibilityMode is enabled to ensure the client can talk to newer version es sever.
-        this.client =
-                new RestHighLevelClientBuilder(
-                                configureRestClientBuilder(RestClient.builder(host), config)
-                                        .build())
-                        .setApiCompatibilityMode(true)
-                        .build();
-        // Test connection
-        try {
-            boolean isConnected = this.client.ping(RequestOptions.DEFAULT);
-            if (!isConnected) {
-                throw Status.INVALID_ARGUMENT
-                        .withDescription("Cannot connect to " + config.getUrl())
-                        .asRuntimeException();
-            }
-        } catch (Exception e) {
-            throw Status.INTERNAL.withDescription(e.getMessage()).asRuntimeException();
-        }
-        this.bulkProcessor = createBulkProcessor(this.requestTracker);
-    }
-
-    private static RestClientBuilder configureRestClientBuilder(
-            RestClientBuilder builder, EsSinkConfig config) {
-        // Possible config:
-        // 1. Connection path prefix
-        // 2. Username and password
-        if (config.getPassword() != null && config.getUsername() != null) {
-            final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
-            credentialsProvider.setCredentials(
-                    AuthScope.ANY,
-                    new UsernamePasswordCredentials(config.getUsername(), config.getPassword()));
-            builder.setHttpClientConfigCallback(
-                    httpClientBuilder ->
-                            httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider));
-        }
-        // 3. Timeout
-        return builder;
-    }
-
-    private BulkProcessor.Builder applyBulkConfig(
-            RestHighLevelClient client, EsSinkConfig config, BulkProcessor.Listener listener) {
-        BulkProcessor.Builder builder =
-                BulkProcessor.builder(
-                        (BulkRequestConsumerFactory)
-                                (bulkRequest, bulkResponseActionListener) ->
-                                        client.bulkAsync(
-                                                bulkRequest,
-                                                RequestOptions.DEFAULT,
-                                                bulkResponseActionListener),
-                        listener);
-        // Possible feature: move these to config
-        // execute the bulk every 10 000 requests
-        builder.setBulkActions(1000);
-        // flush the bulk every 5mb
-        builder.setBulkSize(new ByteSizeValue(5, ByteSizeUnit.MB));
-        // flush the bulk every 5 seconds whatever the number of requests
-        builder.setFlushInterval(TimeValue.timeValueSeconds(5));
-        // Set the number of concurrent requests
-        builder.setConcurrentRequests(1);
-        // Set a custom backoff policy which will initially wait for 100ms, increase exponentially
-        // and retries up to three times.
-        builder.setBackoffPolicy(
-                BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3));
-        return builder;
-    }
-
-    private BulkProcessor createBulkProcessor(RequestTracker requestTracker) {
-        BulkProcessor.Builder builder =
-                applyBulkConfig(this.client, this.config, new BulkListener(requestTracker));
-        return builder.build();
-    }
-
-    private class BulkListener implements BulkProcessor.Listener {
-        private final RequestTracker requestTracker;
-
-        public BulkListener(RequestTracker requestTracker) {
-            this.requestTracker = requestTracker;
-        }
-
-        /** This method is called just before bulk is executed. */
-        @Override
-        public void beforeBulk(long executionId, BulkRequest request) {
-            LOG.debug("Sending bulk of {} actions to Elasticsearch.", request.numberOfActions());
-        }
-
-        /** This method is called after bulk execution. */
-        @Override
-        public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
-            if (response.hasFailures()) {
-                String errMessage =
-                        String.format(
-                                "Bulk of %d actions failed. Failure: %s",
-                                request.numberOfActions(), response.buildFailureMessage());
-                this.requestTracker.addErrResult(errMessage);
-            } else {
-                this.requestTracker.addOkResult(request.numberOfActions());
-                LOG.debug("Sent bulk of {} actions to Elasticsearch.", request.numberOfActions());
-            }
-        }
-
-        /** This method is called when the bulk failed and raised a Throwable */
-        @Override
-        public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
-            String errMessage =
-                    String.format(
-                            "Bulk of %d actions failed. Failure: %s",
-                            request.numberOfActions(), failure.getMessage());
-            this.requestTracker.addErrResult(errMessage);
-        }
-    }
-
-    private void processUpsert(SinkRow row) throws JsonMappingException, JsonProcessingException {
-        final String index = (String) row.get(0);
-        final String key = (String) row.get(1);
-        String doc = (String) row.get(2);
-
-        UpdateRequest updateRequest;
-        if (config.getIndex() != null) {
-            updateRequest =
-                    new UpdateRequest(config.getIndex(), "_doc", key).doc(doc, XContentType.JSON);
+        if (config.getConnector().equals("elasticsearch")) {
+            ElasticRestHighLevelClientAdapter client =
+                    new ElasticRestHighLevelClientAdapter(host, config);
+            this.bulkProcessor = new ElasticBulkProcessorAdapter(this.requestTracker, client);
+        } else if (config.getConnector().equals("opensearch")) {
+            OpensearchRestHighLevelClientAdapter client =
+                    new OpensearchRestHighLevelClientAdapter(host, config);
+            this.bulkProcessor = new OpensearchBulkProcessorAdapter(this.requestTracker, client);
         } else {
-            updateRequest = new UpdateRequest(index, "_doc", key).doc(doc, XContentType.JSON);
+            throw new RuntimeException("Sink type must be elasticsearch or opensearch");
         }
-        updateRequest.docAsUpsert(true);
-        this.requestTracker.addWriteTask();
-        bulkProcessor.add(updateRequest);
     }
 
-    private void processDelete(SinkRow row) throws JsonMappingException, JsonProcessingException {
-        final String index = (String) row.get(0);
+    private void writeRow(SinkRow row) throws JsonMappingException, JsonProcessingException {
         final String key = (String) row.get(1);
-
-        DeleteRequest deleteRequest;
-        if (config.getIndex() != null) {
-            deleteRequest = new DeleteRequest(config.getIndex(), "_doc", key);
+        String doc = (String) row.get(2);
+        final String index;
+        if (config.getIndex() == null) {
+            index = (String) row.get(0);
         } else {
-            deleteRequest = new DeleteRequest(index, "_doc", key);
+            index = config.getIndex();
         }
-        this.requestTracker.addWriteTask();
-        bulkProcessor.add(deleteRequest);
-    }
-
-    private void writeRow(SinkRow row) throws JsonMappingException, JsonProcessingException {
         switch (row.getOp()) {
             case INSERT:
             case UPDATE_INSERT:
-                processUpsert(row);
+                this.bulkProcessor.addRow(index, key, doc);
                 break;
             case DELETE:
             case UPDATE_DELETE:
-                processDelete(row);
+                this.bulkProcessor.deleteRow(index, key);
                 break;
             default:
                 throw Status.INVALID_ARGUMENT
@@ -353,15 +215,10 @@ public void sync() {
     public void drop() {
         try {
             bulkProcessor.awaitClose(100, TimeUnit.SECONDS);
-            client.close();
         } catch (Exception e) {
             throw io.grpc.Status.INTERNAL
                     .withDescription(String.format(ERROR_REPORT_TEMPLATE, e.getMessage()))
                     .asRuntimeException();
         }
     }
-
-    public RestHighLevelClient getClient() {
-        return client;
-    }
 }
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSinkFactory.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSinkFactory.java
index f3fa3bfa16c3b..03e888a892df3 100644
--- a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSinkFactory.java
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSinkFactory.java
@@ -1,16 +1,18 @@
-// Copyright 2024 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/*
+ * Copyright 2024 RisingWave Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package com.risingwave.connector;
 
@@ -23,17 +25,8 @@
 import com.risingwave.proto.Catalog;
 import com.risingwave.proto.Data;
 import io.grpc.Status;
-import java.io.IOException;
 import java.util.Map;
 import org.apache.http.HttpHost;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.elasticsearch.client.RestHighLevelClient;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -90,34 +83,30 @@ public void validate(
         }
 
         // 2. check connection
-        RestClientBuilder builder = RestClient.builder(host);
-        if (config.getPassword() != null && config.getUsername() != null) {
-            final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
-            credentialsProvider.setCredentials(
-                    AuthScope.ANY,
-                    new UsernamePasswordCredentials(config.getUsername(), config.getPassword()));
-            builder.setHttpClientConfigCallback(
-                    httpClientBuilder ->
-                            httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider));
-        }
-        RestHighLevelClient client = new RestHighLevelClient(builder);
-        // Test connection
         try {
-            boolean isConnected = client.ping(RequestOptions.DEFAULT);
-            if (!isConnected) {
-                throw Status.INVALID_ARGUMENT
-                        .withDescription("Cannot connect to " + config.getUrl())
-                        .asRuntimeException();
+            if (config.getConnector().equals("elasticsearch")) {
+                ElasticRestHighLevelClientAdapter esClient =
+                        new ElasticRestHighLevelClientAdapter(host, config);
+                if (!esClient.ping(org.elasticsearch.client.RequestOptions.DEFAULT)) {
+                    throw Status.INVALID_ARGUMENT
+                            .withDescription("Cannot connect to " + config.getUrl())
+                            .asRuntimeException();
+                }
+                esClient.close();
+            } else if (config.getConnector().equals("opensearch")) {
+                OpensearchRestHighLevelClientAdapter opensearchClient =
+                        new OpensearchRestHighLevelClientAdapter(host, config);
+                if (!opensearchClient.ping(org.opensearch.client.RequestOptions.DEFAULT)) {
+                    throw Status.INVALID_ARGUMENT
+                            .withDescription("Cannot connect to " + config.getUrl())
+                            .asRuntimeException();
+                }
+                opensearchClient.close();
+            } else {
+                throw new RuntimeException("Sink type must be elasticsearch or opensearch");
             }
         } catch (Exception e) {
             throw Status.INTERNAL.withDescription(e.getMessage()).asRuntimeException();
         }
-
-        // 3. close client
-        try {
-            client.close();
-        } catch (IOException e) {
-            throw Status.INTERNAL.withDescription(e.getMessage()).asRuntimeException();
-        }
     }
 }
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/OpensearchBulkProcessorAdapter.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/OpensearchBulkProcessorAdapter.java
new file mode 100644
index 0000000000000..d5d8cdc3d237d
--- /dev/null
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/OpensearchBulkProcessorAdapter.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2024 RisingWave Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.risingwave.connector;
+
+import com.risingwave.connector.EsSink.RequestTracker;
+import java.util.concurrent.TimeUnit;
+import org.opensearch.action.bulk.BackoffPolicy;
+import org.opensearch.action.bulk.BulkProcessor;
+import org.opensearch.action.delete.DeleteRequest;
+import org.opensearch.action.update.UpdateRequest;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.core.common.unit.ByteSizeUnit;
+import org.opensearch.core.common.unit.ByteSizeValue;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class OpensearchBulkProcessorAdapter implements BulkProcessorAdapter {
+    private static final Logger LOG = LoggerFactory.getLogger(EsSink.class);
+    private final RequestTracker requestTracker;
+    BulkProcessor opensearchBulkProcessor;
+
+    public OpensearchBulkProcessorAdapter(
+            RequestTracker requestTracker, OpensearchRestHighLevelClientAdapter client) {
+        BulkProcessor.Builder builder =
+                BulkProcessor.builder(
+                        (bulkRequest, bulkResponseActionListener) ->
+                                client.bulkAsync(
+                                        bulkRequest,
+                                        RequestOptions.DEFAULT,
+                                        bulkResponseActionListener),
+                        new BulkListener(requestTracker));
+        // Possible feature: move these to config
+        // execute the bulk every 10 000 requests
+        builder.setBulkActions(1000);
+        // flush the bulk every 5mb
+        builder.setBulkSize(new ByteSizeValue(5, ByteSizeUnit.MB));
+        // flush the bulk every 5 seconds whatever the number of requests
+        builder.setFlushInterval(TimeValue.timeValueSeconds(5));
+        // Set the number of concurrent requests
+        builder.setConcurrentRequests(1);
+        // Set a custom backoff policy which will initially wait for 100ms, increase exponentially
+        // and retries up to three times.
+        builder.setBackoffPolicy(
+                BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3));
+        this.opensearchBulkProcessor = builder.build();
+        this.requestTracker = requestTracker;
+    }
+
+    @Override
+    public void flush() {
+        opensearchBulkProcessor.flush();
+    }
+
+    @Override
+    public void awaitClose(long timeout, TimeUnit unit) throws InterruptedException {
+        opensearchBulkProcessor.awaitClose(timeout, unit);
+    }
+
+    @Override
+    public void addRow(String index, String key, String doc) {
+        UpdateRequest updateRequest;
+        updateRequest = new UpdateRequest(index, key).doc(doc, XContentType.JSON);
+        updateRequest.docAsUpsert(true);
+        this.requestTracker.addWriteTask();
+        this.opensearchBulkProcessor.add(updateRequest);
+    }
+
+    @Override
+    public void deleteRow(String index, String key) {
+        DeleteRequest deleteRequest;
+        deleteRequest = new DeleteRequest(index, key);
+        this.requestTracker.addWriteTask();
+        this.opensearchBulkProcessor.add(deleteRequest);
+    }
+}
diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/OpensearchRestHighLevelClientAdapter.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/OpensearchRestHighLevelClientAdapter.java
new file mode 100644
index 0000000000000..5f3773b0a91aa
--- /dev/null
+++ b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/OpensearchRestHighLevelClientAdapter.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2024 RisingWave Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.risingwave.connector;
+
+import java.io.IOException;
+import org.apache.http.HttpHost;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.impl.client.BasicCredentialsProvider;
+import org.opensearch.action.bulk.BulkRequest;
+import org.opensearch.action.bulk.BulkResponse;
+import org.opensearch.client.Cancellable;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestClientBuilder;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.core.action.ActionListener;
+
+public class OpensearchRestHighLevelClientAdapter implements AutoCloseable {
+    RestHighLevelClient opensearchClient;
+
+    private static RestClientBuilder configureRestClientBuilder(
+            RestClientBuilder builder, EsSinkConfig config) {
+        // Possible config:
+        // 1. Connection path prefix
+        // 2. Username and password
+        if (config.getPassword() != null && config.getUsername() != null) {
+            final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
+            credentialsProvider.setCredentials(
+                    AuthScope.ANY,
+                    new UsernamePasswordCredentials(config.getUsername(), config.getPassword()));
+            builder.setHttpClientConfigCallback(
+                    httpClientBuilder ->
+                            httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider));
+        }
+        // 3. Timeout
+        return builder;
+    }
+
+    public OpensearchRestHighLevelClientAdapter(HttpHost host, EsSinkConfig config) {
+        this.opensearchClient =
+                new org.opensearch.client.RestHighLevelClient(
+                        configureRestClientBuilder(
+                                org.opensearch.client.RestClient.builder(host), config));
+    }
+
+    @Override
+    public void close() throws IOException {
+        opensearchClient.close();
+    }
+
+    public boolean ping(org.opensearch.client.RequestOptions options) throws IOException {
+        boolean flag = opensearchClient.ping(options);
+        return flag;
+    }
+
+    public Cancellable bulkAsync(
+            BulkRequest bulkRequest,
+            RequestOptions options,
+            ActionListener<BulkResponse> listener) {
+        Cancellable cancellable = opensearchClient.bulkAsync(bulkRequest, options, listener);
+        return cancellable;
+    }
+}
diff --git a/java/connector-node/risingwave-sink-iceberg/pom.xml b/java/connector-node/risingwave-sink-iceberg/pom.xml
index a491823bb07f8..b2be8c31e12df 100644
--- a/java/connector-node/risingwave-sink-iceberg/pom.xml
+++ b/java/connector-node/risingwave-sink-iceberg/pom.xml
@@ -16,7 +16,7 @@
     <name>risingwave-sink-iceberg</name>
 
     <properties>
-        <iceberg.version>1.4.1</iceberg.version>
+        <iceberg.version>1.5.2</iceberg.version>
         <maven.compiler.source>11</maven.compiler.source>
         <maven.compiler.target>11</maven.compiler.target>
         <maven.javadoc.skip>true</maven.javadoc.skip>
diff --git a/java/connector-node/risingwave-sink-jdbc/src/main/java/com/risingwave/connector/JDBCSink.java b/java/connector-node/risingwave-sink-jdbc/src/main/java/com/risingwave/connector/JDBCSink.java
index 399a312758e3b..10aa371c50aec 100644
--- a/java/connector-node/risingwave-sink-jdbc/src/main/java/com/risingwave/connector/JDBCSink.java
+++ b/java/connector-node/risingwave-sink-jdbc/src/main/java/com/risingwave/connector/JDBCSink.java
@@ -254,10 +254,7 @@ public void prepareUpsert(SinkRow row) {
                         break;
                     case UPDATE_INSERT:
                         if (!updateFlag) {
-                            throw Status.FAILED_PRECONDITION
-                                    .withDescription(
-                                            "an UPDATE_DELETE should precede an UPDATE_INSERT")
-                                    .asRuntimeException();
+                            LOG.warn("Missing an UPDATE_DELETE precede an UPDATE_INSERT");
                         }
                         jdbcDialect.bindUpsertStatement(upsertStatement, conn, tableSchema, row);
                         updateFlag = false;
@@ -364,10 +361,7 @@ public void beginEpoch(long epoch) {}
     @Override
     public Optional<ConnectorServiceProto.SinkMetadata> barrier(boolean isCheckpoint) {
         if (updateFlag) {
-            throw Status.FAILED_PRECONDITION
-                    .withDescription(
-                            "expected UPDATE_INSERT to complete an UPDATE operation, got `sync`")
-                    .asRuntimeException();
+            LOG.warn("expect an UPDATE_INSERT to complete an UPDATE operation, got `sync`");
         }
         return Optional.empty();
     }
diff --git a/java/pom.xml b/java/pom.xml
index 5f0327bf8ffc9..d2dac0a643830 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -64,7 +64,7 @@
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
         <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
         <protobuf.version>3.21.1</protobuf.version>
-        <grpc.version>1.53.0</grpc.version>
+        <grpc.version>1.64.0</grpc.version>
         <gson.version>2.10</gson.version>
         <module.version>0.1.0-SNAPSHOT</module.version>
         <spotless.version>2.43.0</spotless.version>
@@ -75,10 +75,11 @@
         <commons.text.version>1.10.0</commons.text.version>
         <commons.lang3.version>3.12.0</commons.lang3.version>
         <debezium.version>2.4.2.Final</debezium.version>
-        <jackson.version>2.13.5</jackson.version>
+        <jackson.version>2.15.0</jackson.version>
         <spark_sql.version>3.3.1</spark_sql.version>
-        <hadoop.version>3.3.3</hadoop.version>
+        <hadoop.version>3.4.0</hadoop.version>
         <elasticsearch.version>7.17.19</elasticsearch.version>
+        <opensearch.version>2.11.1</opensearch.version>
         <datastax.version>4.15.0</datastax.version>
         <flink.version>1.18.0</flink.version>
         <testcontainers.version>1.17.6</testcontainers.version>
@@ -88,6 +89,8 @@
         <sqlite.version>3.45.0.0</sqlite.version>
         <aws.version>2.21.42</aws.version>
         <hive.version>3.1.3</hive.version>
+        <jetty.version>12.0.10</jetty.version>
+        <snappy.version>1.1.10.5</snappy.version>
     </properties>
 
     <dependencyManagement>
@@ -195,6 +198,22 @@
                 <artifactId>elasticsearch-rest-high-level-client</artifactId>
                 <version>${elasticsearch.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.opensearch</groupId>
+                <artifactId>opensearch</artifactId>
+                <version>${opensearch.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.opensearch.client</groupId>
+                <artifactId>opensearch-rest-high-level-client</artifactId>
+                <version>${opensearch.version}</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.apache.httpcomponents</groupId>
+                        <artifactId>httpcore-nio</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
             <dependency>
                 <groupId>io.grpc</groupId>
                 <artifactId>grpc-netty-shaded</artifactId>
@@ -378,6 +397,26 @@
                 <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
                 <version>${hadoop.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.eclipse.jetty</groupId>
+                <artifactId>jetty-client</artifactId>
+                <version>${jetty.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.eclipse.jetty</groupId>
+                <artifactId>jetty-util</artifactId>
+                <version>${jetty.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.eclipse.jetty</groupId>
+                <artifactId>jetty-http</artifactId>
+                <version>${jetty.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.xerial.snappy</groupId>
+                <artifactId>snappy-java</artifactId>
+                <version>${snappy.version}</version>
+            </dependency>
             <dependency>
                 <groupId>org.apache.spark</groupId>
                 <artifactId>spark-sql_2.12</artifactId>
diff --git a/proto/catalog.proto b/proto/catalog.proto
index 79be927fc4d16..395a03281b544 100644
--- a/proto/catalog.proto
+++ b/proto/catalog.proto
@@ -6,6 +6,7 @@ import "common.proto";
 import "data.proto";
 import "expr.proto";
 import "plan_common.proto";
+import "secret.proto";
 
 option java_package = "com.risingwave.proto";
 option optimize_for = SPEED;
@@ -84,7 +85,7 @@ message StreamSourceInfo {
   map<string, string> format_encode_options = 14;
 
   // Handle the source relies on any sceret. The key is the propertity name and the value is the secret id and type.
-  map<string, SecretRef> secret_ref = 16;
+  map<string, secret.SecretRef> secret_ref = 16;
 }
 
 message Source {
@@ -181,7 +182,7 @@ message Sink {
   CreateType create_type = 24;
 
   // Handle the sink relies on any sceret. The key is the propertity name and the value is the secret id and type.
-  map<string, SecretRef> secret_ref = 25;
+  map<string, secret.SecretRef> secret_ref = 25;
 
   repeated plan_common.ColumnCatalog original_target_columns = 26;
 }
@@ -452,14 +453,3 @@ message Secret {
   uint32 owner = 5;
   uint32 schema_id = 6;
 }
-
-message SecretRef {
-  enum RefAsType {
-    UNSPECIFIED = 0;
-    TEXT = 1;
-    // AS FILE
-    FILE = 2;
-  }
-  uint32 secret_id = 1;
-  RefAsType ref_as = 2;
-}
diff --git a/proto/secret.proto b/proto/secret.proto
index f5065009519fd..8e4e1b228d6c5 100644
--- a/proto/secret.proto
+++ b/proto/secret.proto
@@ -18,3 +18,14 @@ message Secret {
     SecretHashicropValutBackend hashicorp_vault = 2;
   }
 }
+
+message SecretRef {
+  enum RefAsType {
+    UNSPECIFIED = 0;
+    TEXT = 1;
+    // AS FILE
+    FILE = 2;
+  }
+  uint32 secret_id = 1;
+  RefAsType ref_as = 2;
+}
diff --git a/src/batch/src/exchange_source.rs b/src/batch/src/exchange_source.rs
index b602b14d5c018..409061594338d 100644
--- a/src/batch/src/exchange_source.rs
+++ b/src/batch/src/exchange_source.rs
@@ -15,9 +15,10 @@
 use std::fmt::Debug;
 use std::future::Future;
 
+use futures_async_stream::try_stream;
 use risingwave_common::array::DataChunk;
 
-use crate::error::Result;
+use crate::error::{BatchError, Result};
 use crate::execution::grpc_exchange::GrpcExchangeSource;
 use crate::execution::local_exchange::LocalExchangeSource;
 use crate::executor::test_utils::FakeExchangeSource;
@@ -54,4 +55,16 @@ impl ExchangeSourceImpl {
             ExchangeSourceImpl::Fake(fake) => fake.get_task_id(),
         }
     }
+
+    #[try_stream(boxed, ok = DataChunk, error = BatchError)]
+    pub(crate) async fn take_data_stream(self) {
+        let mut source = self;
+        loop {
+            match source.take_data().await {
+                Ok(Some(chunk)) => yield chunk,
+                Ok(None) => break,
+                Err(e) => return Err(e),
+            }
+        }
+    }
 }
diff --git a/src/batch/src/executor/merge_sort.rs b/src/batch/src/executor/merge_sort.rs
new file mode 100644
index 0000000000000..1f5c8f3e5fc2c
--- /dev/null
+++ b/src/batch/src/executor/merge_sort.rs
@@ -0,0 +1,195 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::mem;
+use std::sync::Arc;
+
+use futures_async_stream::try_stream;
+use futures_util::StreamExt;
+use itertools::Itertools;
+use risingwave_common::array::DataChunk;
+use risingwave_common::catalog::Schema;
+use risingwave_common::memory::{MemMonitoredHeap, MemoryContext, MonitoredGlobalAlloc};
+use risingwave_common::types::ToOwnedDatum;
+use risingwave_common::util::sort_util::{ColumnOrder, HeapElem};
+use risingwave_common_estimate_size::EstimateSize;
+
+use super::{BoxedDataChunkStream, BoxedExecutor, Executor};
+use crate::error::{BatchError, Result};
+
+pub struct MergeSortExecutor {
+    inputs: Vec<BoxedExecutor>,
+    column_orders: Arc<Vec<ColumnOrder>>,
+    identity: String,
+    schema: Schema,
+    chunk_size: usize,
+    mem_context: MemoryContext,
+    min_heap: MemMonitoredHeap<HeapElem>,
+    current_chunks: Vec<Option<DataChunk>, MonitoredGlobalAlloc>,
+}
+
+impl Executor for MergeSortExecutor {
+    fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
+    fn identity(&self) -> &str {
+        &self.identity
+    }
+
+    fn execute(self: Box<Self>) -> BoxedDataChunkStream {
+        self.do_execute()
+    }
+}
+
+impl MergeSortExecutor {
+    #[try_stream(boxed, ok = DataChunk, error = BatchError)]
+    async fn do_execute(mut self: Box<Self>) {
+        let mut inputs = vec![];
+        mem::swap(&mut inputs, &mut self.inputs);
+        let mut input_streams = inputs
+            .into_iter()
+            .map(|input| input.execute())
+            .collect_vec();
+        for (input_idx, input_stream) in input_streams.iter_mut().enumerate() {
+            match input_stream.next().await {
+                Some(chunk) => {
+                    let chunk = chunk?;
+                    self.current_chunks.push(Some(chunk));
+                    if let Some(chunk) = &self.current_chunks[input_idx] {
+                        // We assume that we would always get a non-empty chunk from the upstream of
+                        // exchange, therefore we are sure that there is at least
+                        // one visible row.
+                        let next_row_idx = chunk.next_visible_row_idx(0);
+                        self.push_row_into_heap(input_idx, next_row_idx.unwrap());
+                    }
+                }
+                None => {
+                    self.current_chunks.push(None);
+                }
+            }
+        }
+
+        while !self.min_heap.is_empty() {
+            // It is possible that we cannot produce this much as
+            // we may run out of input data chunks from sources.
+            let mut want_to_produce = self.chunk_size;
+
+            let mut builders: Vec<_> = self
+                .schema
+                .fields
+                .iter()
+                .map(|field| field.data_type.create_array_builder(self.chunk_size))
+                .collect();
+            let mut array_len = 0;
+            while want_to_produce > 0 && !self.min_heap.is_empty() {
+                let top_elem = self.min_heap.pop().unwrap();
+                let child_idx = top_elem.chunk_idx();
+                let cur_chunk = top_elem.chunk();
+                let row_idx = top_elem.elem_idx();
+                for (idx, builder) in builders.iter_mut().enumerate() {
+                    let chunk_arr = cur_chunk.column_at(idx);
+                    let chunk_arr = chunk_arr.as_ref();
+                    let datum = chunk_arr.value_at(row_idx).to_owned_datum();
+                    builder.append(&datum);
+                }
+                want_to_produce -= 1;
+                array_len += 1;
+                // check whether we have another row from the same chunk being popped
+                let possible_next_row_idx = cur_chunk.next_visible_row_idx(row_idx + 1);
+                match possible_next_row_idx {
+                    Some(next_row_idx) => {
+                        self.push_row_into_heap(child_idx, next_row_idx);
+                    }
+                    None => {
+                        self.get_input_chunk(&mut input_streams, child_idx).await?;
+                        if let Some(chunk) = &self.current_chunks[child_idx] {
+                            let next_row_idx = chunk.next_visible_row_idx(0);
+                            self.push_row_into_heap(child_idx, next_row_idx.unwrap());
+                        }
+                    }
+                }
+            }
+
+            let columns = builders
+                .into_iter()
+                .map(|builder| builder.finish().into())
+                .collect::<Vec<_>>();
+            let chunk = DataChunk::new(columns, array_len);
+            yield chunk
+        }
+    }
+
+    async fn get_input_chunk(
+        &mut self,
+        input_streams: &mut Vec<BoxedDataChunkStream>,
+        input_idx: usize,
+    ) -> Result<()> {
+        assert!(input_idx < input_streams.len());
+        let res = input_streams[input_idx].next().await;
+        let old = match res {
+            Some(chunk) => {
+                let chunk = chunk?;
+                assert_ne!(chunk.cardinality(), 0);
+                let new_chunk_size = chunk.estimated_heap_size() as i64;
+                let old = std::mem::replace(&mut self.current_chunks[input_idx], Some(chunk));
+                self.mem_context.add(new_chunk_size);
+                old
+            }
+            None => std::mem::take(&mut self.current_chunks[input_idx]),
+        };
+
+        if let Some(chunk) = old {
+            // Reduce the heap size of retired chunk
+            self.mem_context.add(-(chunk.estimated_heap_size() as i64));
+        }
+
+        Ok(())
+    }
+
+    fn push_row_into_heap(&mut self, input_idx: usize, row_idx: usize) {
+        assert!(input_idx < self.current_chunks.len());
+        let chunk_ref = self.current_chunks[input_idx].as_ref().unwrap();
+        self.min_heap.push(HeapElem::new(
+            self.column_orders.clone(),
+            chunk_ref.clone(),
+            input_idx,
+            row_idx,
+            None,
+        ));
+    }
+}
+
+impl MergeSortExecutor {
+    pub fn new(
+        inputs: Vec<BoxedExecutor>,
+        column_orders: Arc<Vec<ColumnOrder>>,
+        schema: Schema,
+        identity: String,
+        chunk_size: usize,
+        mem_context: MemoryContext,
+    ) -> Self {
+        let inputs_num = inputs.len();
+        Self {
+            inputs,
+            column_orders,
+            identity,
+            schema,
+            chunk_size,
+            min_heap: MemMonitoredHeap::with_capacity(inputs_num, mem_context.clone()),
+            current_chunks: Vec::with_capacity_in(inputs_num, mem_context.global_allocator()),
+            mem_context,
+        }
+    }
+}
diff --git a/src/batch/src/executor/merge_sort_exchange.rs b/src/batch/src/executor/merge_sort_exchange.rs
index e2779967dbcbe..3b5647729db25 100644
--- a/src/batch/src/executor/merge_sort_exchange.rs
+++ b/src/batch/src/executor/merge_sort_exchange.rs
@@ -17,18 +17,15 @@ use std::sync::Arc;
 use futures_async_stream::try_stream;
 use risingwave_common::array::DataChunk;
 use risingwave_common::catalog::{Field, Schema};
-use risingwave_common::memory::{MemMonitoredHeap, MemoryContext, MonitoredGlobalAlloc};
-use risingwave_common::types::ToOwnedDatum;
-use risingwave_common::util::sort_util::{ColumnOrder, HeapElem};
-use risingwave_common_estimate_size::EstimateSize;
+use risingwave_common::memory::MemoryContext;
+use risingwave_common::util::sort_util::ColumnOrder;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::PbExchangeSource;
 
 use crate::error::{BatchError, Result};
-use crate::exchange_source::ExchangeSourceImpl;
 use crate::executor::{
     BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, CreateSource, DefaultCreateSource,
-    Executor, ExecutorBuilder,
+    Executor, ExecutorBuilder, MergeSortExecutor, WrapStreamExecutor,
 };
 use crate::task::{BatchTaskContext, TaskId};
 
@@ -38,23 +35,16 @@ pub type MergeSortExchangeExecutor<C> = MergeSortExchangeExecutorImpl<DefaultCre
 /// The outputs of all the sources have been sorted in the same way.
 pub struct MergeSortExchangeExecutorImpl<CS, C> {
     context: C,
-    /// keeps one data chunk of each source if any
-    source_inputs: Vec<Option<DataChunk>, MonitoredGlobalAlloc>,
     column_orders: Arc<Vec<ColumnOrder>>,
-    min_heap: MemMonitoredHeap<HeapElem>,
     proto_sources: Vec<PbExchangeSource>,
-    sources: Vec<ExchangeSourceImpl>, // impl
     /// Mock-able `CreateSource`.
     source_creators: Vec<CS>,
     schema: Schema,
-    #[expect(dead_code)]
     task_id: TaskId,
     identity: String,
     /// The maximum size of the chunk produced by executor at a time.
     chunk_size: usize,
     mem_ctx: MemoryContext,
-    #[expect(dead_code)]
-    alloc: MonitoredGlobalAlloc,
 }
 
 impl<CS: 'static + Send + CreateSource, C: BatchTaskContext> MergeSortExchangeExecutorImpl<CS, C> {
@@ -70,69 +60,18 @@ impl<CS: 'static + Send + CreateSource, C: BatchTaskContext> MergeSortExchangeEx
         chunk_size: usize,
     ) -> Self {
         let mem_ctx = context.create_executor_mem_context(&identity);
-        let alloc = MonitoredGlobalAlloc::with_memory_context(mem_ctx.clone());
-
-        let source_inputs = {
-            let mut v = Vec::with_capacity_in(proto_sources.len(), alloc.clone());
-            (0..proto_sources.len()).for_each(|_| v.push(None));
-            v
-        };
-
-        let num_sources = proto_sources.len();
 
         Self {
             context,
-            source_inputs,
             column_orders,
-            min_heap: MemMonitoredHeap::with_capacity(num_sources, mem_ctx.clone()),
             proto_sources,
-            sources: Vec::with_capacity(num_sources),
             source_creators,
             schema,
             task_id,
             identity,
             chunk_size,
             mem_ctx,
-            alloc,
-        }
-    }
-
-    /// We assume that the source would always send `Some(chunk)` with cardinality > 0
-    /// or `None`, but never `Some(chunk)` with cardinality == 0.
-    async fn get_source_chunk(&mut self, source_idx: usize) -> Result<()> {
-        assert!(source_idx < self.source_inputs.len());
-        let res = self.sources[source_idx].take_data().await?;
-        let old = match res {
-            Some(chunk) => {
-                assert_ne!(chunk.cardinality(), 0);
-                let new_chunk_size = chunk.estimated_heap_size() as i64;
-                let old = std::mem::replace(&mut self.source_inputs[source_idx], Some(chunk));
-                self.mem_ctx.add(new_chunk_size);
-                old
-            }
-            None => std::mem::take(&mut self.source_inputs[source_idx]),
-        };
-
-        if let Some(chunk) = old {
-            // Reduce the heap size of retired chunk
-            self.mem_ctx.add(-(chunk.estimated_heap_size() as i64));
         }
-
-        Ok(())
-    }
-
-    // Check whether there is indeed a chunk and there is a visible row sitting at `row_idx`
-    // in the chunk before calling this function.
-    fn push_row_into_heap(&mut self, source_idx: usize, row_idx: usize) {
-        assert!(source_idx < self.source_inputs.len());
-        let chunk_ref = self.source_inputs[source_idx].as_ref().unwrap();
-        self.min_heap.push(HeapElem::new(
-            self.column_orders.clone(),
-            chunk_ref.clone(),
-            source_idx,
-            row_idx,
-            None,
-        ));
     }
 }
 
@@ -156,71 +95,31 @@ impl<CS: 'static + Send + CreateSource, C: BatchTaskContext> Executor
 /// `self.chunk_size` as the executor runs out of input from `sources`.
 impl<CS: 'static + Send + CreateSource, C: BatchTaskContext> MergeSortExchangeExecutorImpl<CS, C> {
     #[try_stream(boxed, ok = DataChunk, error = BatchError)]
-    async fn do_execute(mut self: Box<Self>) {
+    async fn do_execute(self: Box<Self>) {
+        let mut sources: Vec<BoxedExecutor> = vec![];
         for source_idx in 0..self.proto_sources.len() {
             let new_source = self.source_creators[source_idx]
                 .create_source(self.context.clone(), &self.proto_sources[source_idx])
                 .await?;
-            self.sources.push(new_source);
-            self.get_source_chunk(source_idx).await?;
-            if let Some(chunk) = &self.source_inputs[source_idx] {
-                // We assume that we would always get a non-empty chunk from the upstream of
-                // exchange, therefore we are sure that there is at least
-                // one visible row.
-                let next_row_idx = chunk.next_visible_row_idx(0);
-                self.push_row_into_heap(source_idx, next_row_idx.unwrap());
-            }
-        }
 
-        // If there is no rows in the heap,
-        // we run out of input data chunks and emit `Done`.
-        while !self.min_heap.is_empty() {
-            // It is possible that we cannot produce this much as
-            // we may run out of input data chunks from sources.
-            let mut want_to_produce = self.chunk_size;
+            sources.push(Box::new(WrapStreamExecutor::new(
+                self.schema.clone(),
+                new_source.take_data_stream(),
+            )));
+        }
 
-            let mut builders: Vec<_> = self
-                .schema()
-                .fields
-                .iter()
-                .map(|field| field.data_type.create_array_builder(self.chunk_size))
-                .collect();
-            let mut array_len = 0;
-            while want_to_produce > 0 && !self.min_heap.is_empty() {
-                let top_elem = self.min_heap.pop().unwrap();
-                let child_idx = top_elem.chunk_idx();
-                let cur_chunk = top_elem.chunk();
-                let row_idx = top_elem.elem_idx();
-                for (idx, builder) in builders.iter_mut().enumerate() {
-                    let chunk_arr = cur_chunk.column_at(idx);
-                    let chunk_arr = chunk_arr.as_ref();
-                    let datum = chunk_arr.value_at(row_idx).to_owned_datum();
-                    builder.append(&datum);
-                }
-                want_to_produce -= 1;
-                array_len += 1;
-                // check whether we have another row from the same chunk being popped
-                let possible_next_row_idx = cur_chunk.next_visible_row_idx(row_idx + 1);
-                match possible_next_row_idx {
-                    Some(next_row_idx) => {
-                        self.push_row_into_heap(child_idx, next_row_idx);
-                    }
-                    None => {
-                        self.get_source_chunk(child_idx).await?;
-                        if let Some(chunk) = &self.source_inputs[child_idx] {
-                            let next_row_idx = chunk.next_visible_row_idx(0);
-                            self.push_row_into_heap(child_idx, next_row_idx.unwrap());
-                        }
-                    }
-                }
-            }
+        let merge_sort_executor = Box::new(MergeSortExecutor::new(
+            sources,
+            self.column_orders.clone(),
+            self.schema,
+            format!("MergeSortExecutor{}", &self.task_id.task_id),
+            self.chunk_size,
+            self.mem_ctx,
+        ));
 
-            let columns = builders
-                .into_iter()
-                .map(|builder| builder.finish().into())
-                .collect::<Vec<_>>();
-            let chunk = DataChunk::new(columns, array_len);
-            yield chunk
+        #[for_await]
+        for chunk in merge_sort_executor.execute() {
+            yield chunk?;
         }
     }
 }
diff --git a/src/batch/src/executor/mod.rs b/src/batch/src/executor/mod.rs
index b77027327fe05..c19bc06c141b9 100644
--- a/src/batch/src/executor/mod.rs
+++ b/src/batch/src/executor/mod.rs
@@ -27,6 +27,7 @@ mod limit;
 mod log_row_seq_scan;
 mod managed;
 mod max_one_row;
+mod merge_sort;
 mod merge_sort_exchange;
 mod order_by;
 mod project;
@@ -60,6 +61,7 @@ pub use join::*;
 pub use limit::*;
 pub use managed::*;
 pub use max_one_row::*;
+pub use merge_sort::*;
 pub use merge_sort_exchange::*;
 pub use order_by::*;
 pub use project::*;
diff --git a/src/common/src/catalog/column.rs b/src/common/src/catalog/column.rs
index 82d2f22f41cb4..bb6c8b7a39903 100644
--- a/src/common/src/catalog/column.rs
+++ b/src/common/src/catalog/column.rs
@@ -21,7 +21,7 @@ use risingwave_pb::plan_common::{
     AdditionalColumn, ColumnDescVersion, PbColumnCatalog, PbColumnDesc,
 };
 
-use super::row_id_column_desc;
+use super::{row_id_column_desc, USER_COLUMN_ID_OFFSET};
 use crate::catalog::{cdc_table_name_column_desc, offset_column_desc, Field, ROW_ID_COLUMN_ID};
 use crate::types::DataType;
 
@@ -45,6 +45,10 @@ impl ColumnId {
     pub const fn placeholder() -> Self {
         Self(i32::MAX - 1)
     }
+
+    pub const fn first_user_column() -> Self {
+        Self(USER_COLUMN_ID_OFFSET)
+    }
 }
 
 impl ColumnId {
@@ -346,6 +350,11 @@ impl ColumnCatalog {
         self.column_desc.is_default()
     }
 
+    /// If the columns is an `INCLUDE ... AS ...` connector column.
+    pub fn is_connector_additional_column(&self) -> bool {
+        self.column_desc.additional_column.column_type.is_some()
+    }
+
     /// Get a reference to the column desc's data type.
     pub fn data_type(&self) -> &DataType {
         &self.column_desc.data_type
@@ -430,15 +439,30 @@ pub fn columns_extend(preserved_columns: &mut Vec<ColumnCatalog>, columns: Vec<C
     preserved_columns.extend(columns);
 }
 
-pub fn is_column_ids_dedup(columns: &[ColumnCatalog]) -> bool {
-    let mut column_ids = columns
+pub fn debug_assert_column_ids_distinct(columns: &[ColumnCatalog]) {
+    debug_assert!(
+        columns
+            .iter()
+            .map(|c| c.column_id())
+            .duplicates()
+            .next()
+            .is_none(),
+        "duplicate ColumnId found in source catalog. Columns: {columns:#?}"
+    );
+}
+
+/// FIXME: perhapts we should use sth like `ColumnIdGenerator::new_alter`,
+/// However, the `SourceVersion` is problematic: It doesn't contain `next_col_id`.
+/// (But for now this isn't a large problem, since drop column is not allowed for source yet..)
+///
+/// Besides, the logic of column id handling is a mess.
+/// In some places, we use `ColumnId::placeholder()`, and use `col_id_gen` to fill it at the end;
+/// In other places, we create column id ad-hoc.
+pub fn max_column_id(columns: &[ColumnCatalog]) -> ColumnId {
+    // XXX: should we check the column IDs of struct fields here?
+    columns
         .iter()
-        .map(|column| column.column_id().get_id())
-        .collect_vec();
-    column_ids.sort();
-    let original_len = column_ids.len();
-    column_ids.dedup();
-    column_ids.len() == original_len
+        .fold(ColumnId::first_user_column(), |a, b| a.max(b.column_id()))
 }
 
 #[cfg(test)]
diff --git a/src/common/src/config.rs b/src/common/src/config.rs
index 0dc6b48d2d8da..a554e220ec632 100644
--- a/src/common/src/config.rs
+++ b/src/common/src/config.rs
@@ -755,6 +755,10 @@ pub struct StorageConfig {
     #[serde(default = "default::storage::compactor_iter_max_io_retry_times")]
     pub compactor_iter_max_io_retry_times: usize,
 
+    /// The window size of table info statistic history.
+    #[serde(default = "default::storage::table_info_statistic_history_times")]
+    pub table_info_statistic_history_times: usize,
+
     #[serde(default, flatten)]
     #[config_doc(omitted)]
     pub unrecognized: Unrecognized<Self>,
@@ -977,8 +981,10 @@ pub struct StreamingDeveloperConfig {
 
     #[serde(default = "default::developer::stream_enable_arrangement_backfill")]
     /// Enable arrangement backfill
-    /// If true, the arrangement backfill will be disabled,
+    /// If false, the arrangement backfill will be disabled,
     /// even if session variable set.
+    /// If true, it will be enabled by default, but session variable
+    /// can override it.
     pub enable_arrangement_backfill: bool,
 
     #[serde(default = "default::developer::stream_high_join_amplification_threshold")]
@@ -1045,6 +1051,14 @@ pub struct ObjectStoreConfig {
     /// Some special configuration of S3 Backend
     #[serde(default)]
     pub s3: S3ObjectStoreConfig,
+
+    // TODO: the following field will be deprecated after opendal is stablized
+    #[serde(default = "default::object_store_config::opendal_upload_concurrency")]
+    pub opendal_upload_concurrency: usize,
+
+    // TODO: the following field will be deprecated after opendal is stablized
+    #[serde(default)]
+    pub opendal_writer_abort_on_err: bool,
 }
 
 impl ObjectStoreConfig {
@@ -1103,6 +1117,7 @@ pub struct S3ObjectStoreDeveloperConfig {
     )]
     pub retryable_service_error_codes: Vec<String>,
 
+    // TODO: the following field will be deprecated after opendal is stablized
     #[serde(default = "default::object_store_config::s3::developer::use_opendal")]
     pub use_opendal: bool,
 }
@@ -1565,6 +1580,10 @@ pub mod default {
         pub fn compactor_concurrent_uploading_sst_count() -> Option<usize> {
             None
         }
+
+        pub fn table_info_statistic_history_times() -> usize {
+            240
+        }
     }
 
     pub mod streaming {
@@ -2004,6 +2023,10 @@ pub mod default {
             DEFAULT_REQ_MAX_RETRY_ATTEMPTS
         }
 
+        pub fn opendal_upload_concurrency() -> usize {
+            8
+        }
+
         pub mod s3 {
             const DEFAULT_IDENTITY_RESOLUTION_TIMEOUT_S: u64 = 5;
 
diff --git a/src/config/docs.md b/src/config/docs.md
index 59c8961a15bea..a52ce9202a3b3 100644
--- a/src/config/docs.md
+++ b/src/config/docs.md
@@ -138,6 +138,7 @@ This page is automatically generated by `./risedev generate-example-config`
 | shared_buffer_flush_ratio | The shared buffer will start flushing data to object when the ratio of memory usage to the shared buffer capacity exceed such ratio. | 0.800000011920929 |
 | shared_buffer_min_batch_flush_size_mb | The minimum total flush size of shared buffer spill. When a shared buffer spilled is trigger, the total flush size across multiple epochs should be at least higher than this size. | 800 |
 | sstable_id_remote_fetch_number | Number of SST ids fetched from meta per RPC | 10 |
+| table_info_statistic_history_times | The window size of table info statistic history. | 240 |
 | write_conflict_detection_enabled | Whether to enable write conflict detection | true |
 
 ## streaming
diff --git a/src/config/example.toml b/src/config/example.toml
index 27bbea13ade15..a708fed3b84bf 100644
--- a/src/config/example.toml
+++ b/src/config/example.toml
@@ -150,6 +150,7 @@ max_preload_io_retry_times = 3
 compactor_fast_max_compact_delete_ratio = 40
 compactor_fast_max_compact_task_size = 2147483648
 compactor_iter_max_io_retry_times = 8
+table_info_statistic_history_times = 240
 mem_table_spill_threshold = 4194304
 
 [storage.cache.block_cache_eviction]
@@ -191,6 +192,8 @@ recent_filter_rotate_interval_ms = 10000
 
 [storage.object_store]
 set_atomic_write_dir = false
+opendal_upload_concurrency = 8
+opendal_writer_abort_on_err = false
 
 [storage.object_store.retry]
 req_backoff_interval_ms = 1000
diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml
index 7cb6f23e5ec7e..bfc283fa6c195 100644
--- a/src/connector/Cargo.toml
+++ b/src/connector/Cargo.toml
@@ -15,7 +15,7 @@ normal = ["workspace-hack"]
 
 [dependencies]
 anyhow = "1"
-apache-avro = { git = "https://github.com/risingwavelabs/avro", rev = "5349b0c7b35940d117397edbd314ca9087cdb892", features = [
+apache-avro = { git = "https://github.com/risingwavelabs/avro", rev = "25113ba88234a9ae23296e981d8302c290fdaa4b", features = [
     "snappy",
     "zstandard",
     "bzip",
@@ -68,7 +68,7 @@ google-cloud-googleapis = { version = "0.13", features = ["pubsub", "bigquery"]
 google-cloud-pubsub = "0.25"
 http = "0.2"
 icelake = { workspace = true }
-indexmap = { version = "1.9.3", features = ["serde"] }
+indexmap = { version = "2.2.6", features = ["serde"] }
 itertools = { workspace = true }
 jni = { version = "0.21.1", features = ["invocation"] }
 jsonbb = { workspace = true }
@@ -76,6 +76,7 @@ jsonwebtoken = "9.2.0"
 jst = { package = 'jsonschema-transpiler', git = "https://github.com/mozilla/jsonschema-transpiler", rev = "c1a89d720d118843d8bcca51084deb0ed223e4b4" }
 maplit = "1.0.2"
 moka = { version = "0.12", features = ["future"] }
+mongodb = { version = "2.8.2", features = ["tokio-runtime"] }
 mysql_async = { version = "0.34", default-features = false, features = [
     "default",
 ] }
diff --git a/src/connector/codec/Cargo.toml b/src/connector/codec/Cargo.toml
index ef12b325d446d..172aacb1c53f3 100644
--- a/src/connector/codec/Cargo.toml
+++ b/src/connector/codec/Cargo.toml
@@ -16,7 +16,7 @@ normal = ["workspace-hack"]
 
 [dependencies]
 anyhow = "1"
-apache-avro = { git = "https://github.com/risingwavelabs/avro", rev = "5349b0c7b35940d117397edbd314ca9087cdb892", features = [
+apache-avro = { git = "https://github.com/risingwavelabs/avro", rev = "25113ba88234a9ae23296e981d8302c290fdaa4b", features = [
     "snappy",
     "zstandard",
     "bzip",
diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs
index dfda61f6ce578..92bb7d9c30677 100644
--- a/src/connector/src/connector_common/common.rs
+++ b/src/connector/src/connector_common/common.rs
@@ -744,3 +744,24 @@ pub(crate) fn load_private_key(
         .ok_or_else(|| anyhow!("No private key found"))?;
     Ok(cert?.into())
 }
+
+#[serde_as]
+#[derive(Deserialize, Debug, Clone, WithOptions)]
+pub struct MongodbCommon {
+    /// The URL of MongoDB
+    #[serde(rename = "mongodb.url")]
+    pub connect_uri: String,
+    /// The collection name where data should be written to or read from. For sinks, the format is
+    /// `db_name.collection_name`. Data can also be written to dynamic collections, see `collection.name.field`
+    /// for more information.
+    #[serde(rename = "collection.name")]
+    pub collection_name: String,
+}
+
+impl MongodbCommon {
+    pub(crate) async fn build_client(&self) -> ConnectorResult<mongodb::Client> {
+        let client = mongodb::Client::with_uri_str(&self.connect_uri).await?;
+
+        Ok(client)
+    }
+}
diff --git a/src/connector/src/connector_common/mod.rs b/src/connector/src/connector_common/mod.rs
index 7a6254c8cde93..4ec36ba78e0be 100644
--- a/src/connector/src/connector_common/mod.rs
+++ b/src/connector/src/connector_common/mod.rs
@@ -18,6 +18,6 @@ pub use mqtt_common::{MqttCommon, QualityOfService as MqttQualityOfService};
 pub mod common;
 pub use common::{
     AwsAuthProps, AwsPrivateLinkItem, KafkaCommon, KafkaPrivateLinkCommon, KinesisCommon,
-    NatsCommon, PulsarCommon, PulsarOauthCommon, RdKafkaPropertiesCommon,
+    MongodbCommon, NatsCommon, PulsarCommon, PulsarOauthCommon, RdKafkaPropertiesCommon,
     PRIVATE_LINK_BROKER_REWRITE_MAP_KEY, PRIVATE_LINK_TARGETS_KEY,
 };
diff --git a/src/connector/src/error.rs b/src/connector/src/error.rs
index ab4b3e7bc37b5..376cdb808216f 100644
--- a/src/connector/src/error.rs
+++ b/src/connector/src/error.rs
@@ -63,6 +63,7 @@ def_anyhow_newtype! {
     rumqttc::tokio_rustls::rustls::Error => "TLS error",
     rumqttc::v5::ClientError => "MQTT error",
     rumqttc::v5::OptionError => "MQTT error",
+    mongodb::error::Error => "Mongodb error",
 
     openssl::error::ErrorStack => "OpenSSL error",
 }
diff --git a/src/connector/src/parser/additional_columns.rs b/src/connector/src/parser/additional_columns.rs
index 253718a00a7df..f50429f716073 100644
--- a/src/connector/src/parser/additional_columns.rs
+++ b/src/connector/src/parser/additional_columns.rs
@@ -16,7 +16,7 @@ use std::collections::{HashMap, HashSet};
 use std::sync::LazyLock;
 
 use risingwave_common::bail;
-use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, ColumnId};
+use risingwave_common::catalog::{max_column_id, ColumnCatalog, ColumnDesc, ColumnId};
 use risingwave_common::types::{DataType, StructType};
 use risingwave_pb::data::data_type::TypeName;
 use risingwave_pb::data::DataType as PbDataType;
@@ -280,11 +280,7 @@ pub fn source_add_partition_offset_cols(
     connector_name: &str,
 ) -> ([bool; 2], [ColumnCatalog; 2]) {
     let mut columns_exist = [false; 2];
-    let mut last_column_id = columns
-        .iter()
-        .map(|c| c.column_desc.column_id)
-        .max()
-        .unwrap_or(ColumnId::placeholder());
+    let mut last_column_id = max_column_id(columns);
 
     let additional_columns: Vec<_> = {
         let compat_col_types = COMPATIBLE_ADDITIONAL_COLUMNS
diff --git a/src/connector/src/parser/mod.rs b/src/connector/src/parser/mod.rs
index f7667a66a3747..26cf746b535dc 100644
--- a/src/connector/src/parser/mod.rs
+++ b/src/connector/src/parser/mod.rs
@@ -1028,6 +1028,7 @@ pub mod test_utils {
     }
 }
 
+/// Note: this is created in `SourceReader::build_stream`
 #[derive(Debug, Clone, Default)]
 pub struct ParserConfig {
     pub common: CommonParserConfig,
diff --git a/src/connector/src/sink/big_query.rs b/src/connector/src/sink/big_query.rs
index b2376246a52f7..c89e200093473 100644
--- a/src/connector/src/sink/big_query.rs
+++ b/src/connector/src/sink/big_query.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use core::mem;
 use core::time::Duration;
 use std::collections::{BTreeMap, HashMap};
 use std::sync::Arc;
@@ -81,12 +80,19 @@ pub struct BigQueryCommon {
     #[serde(rename = "bigquery.max_batch_rows", default = "default_max_batch_rows")]
     #[serde_as(as = "DisplayFromStr")]
     pub max_batch_rows: usize,
+    #[serde(rename = "bigquery.retry_times", default = "default_retry_times")]
+    #[serde_as(as = "DisplayFromStr")]
+    pub retry_times: usize,
 }
 
 fn default_max_batch_rows() -> usize {
     1024
 }
 
+fn default_retry_times() -> usize {
+    5
+}
+
 impl BigQueryCommon {
     async fn build_client(&self, aws_auth_props: &AwsAuthProps) -> Result<Client> {
         let auth_json = self.get_auth_json_from_path(aws_auth_props).await?;
@@ -469,12 +475,25 @@ impl BigQuerySinkWriter {
         if self.write_rows.is_empty() {
             return Ok(());
         }
-        let rows = mem::take(&mut self.write_rows);
-        self.write_rows_count = 0;
-        self.client
-            .append_rows(rows, self.write_stream.clone())
-            .await?;
-        Ok(())
+        let mut errs = Vec::with_capacity(self.config.common.retry_times);
+        for _ in 0..self.config.common.retry_times {
+            match self
+                .client
+                .append_rows(self.write_rows.clone(), self.write_stream.clone())
+                .await
+            {
+                Ok(_) => {
+                    self.write_rows_count = 0;
+                    self.write_rows.clear();
+                    return Ok(());
+                }
+                Err(e) => errs.push(e),
+            }
+        }
+        Err(SinkError::BigQuery(anyhow::anyhow!(
+            "Insert error {:?}",
+            errs
+        )))
     }
 }
 
diff --git a/src/connector/src/sink/catalog/desc.rs b/src/connector/src/sink/catalog/desc.rs
index d1afa1a7a31d2..2a36e3e0b1406 100644
--- a/src/connector/src/sink/catalog/desc.rs
+++ b/src/connector/src/sink/catalog/desc.rs
@@ -19,7 +19,7 @@ use risingwave_common::catalog::{
     ColumnCatalog, ConnectionId, CreateType, DatabaseId, SchemaId, TableId, UserId,
 };
 use risingwave_common::util::sort_util::ColumnOrder;
-use risingwave_pb::catalog::PbSecretRef;
+use risingwave_pb::secret::PbSecretRef;
 use risingwave_pb::stream_plan::PbSinkDesc;
 
 use super::{SinkCatalog, SinkFormatDesc, SinkId, SinkType};
diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs
index 72a2c6ea788c5..0673a571573e6 100644
--- a/src/connector/src/sink/catalog/mod.rs
+++ b/src/connector/src/sink/catalog/mod.rs
@@ -25,8 +25,9 @@ use risingwave_common::catalog::{
 use risingwave_common::util::epoch::Epoch;
 use risingwave_common::util::sort_util::ColumnOrder;
 use risingwave_pb::catalog::{
-    PbCreateType, PbSecretRef, PbSink, PbSinkFormatDesc, PbSinkType, PbStreamJobStatus,
+    PbCreateType, PbSink, PbSinkFormatDesc, PbSinkType, PbStreamJobStatus,
 };
+use risingwave_pb::secret::PbSecretRef;
 
 use super::{
     SinkError, CONNECTOR_TYPE_KEY, SINK_TYPE_APPEND_ONLY, SINK_TYPE_DEBEZIUM, SINK_TYPE_OPTION,
diff --git a/src/connector/src/sink/clickhouse.rs b/src/connector/src/sink/clickhouse.rs
index c506f00e6d2ca..8af58f668a440 100644
--- a/src/connector/src/sink/clickhouse.rs
+++ b/src/connector/src/sink/clickhouse.rs
@@ -61,20 +61,22 @@ pub struct ClickHouseCommon {
     pub database: String,
     #[serde(rename = "clickhouse.table")]
     pub table: String,
+    #[serde(rename = "clickhouse.delete.column")]
+    pub delete_column: Option<String>,
 }
 
 #[allow(clippy::enum_variant_names)]
 #[derive(Debug)]
 enum ClickHouseEngine {
     MergeTree,
-    ReplacingMergeTree,
+    ReplacingMergeTree(Option<String>),
     SummingMergeTree,
     AggregatingMergeTree,
     CollapsingMergeTree(String),
     VersionedCollapsingMergeTree(String),
     GraphiteMergeTree,
     ReplicatedMergeTree,
-    ReplicatedReplacingMergeTree,
+    ReplicatedReplacingMergeTree(Option<String>),
     ReplicatedSummingMergeTree,
     ReplicatedAggregatingMergeTree,
     #[expect(dead_code)]
@@ -94,6 +96,24 @@ impl ClickHouseEngine {
         )
     }
 
+    pub fn is_delete_replacing_engine(&self) -> bool {
+        match self {
+            ClickHouseEngine::ReplacingMergeTree(delete_col) => delete_col.is_some(),
+            ClickHouseEngine::ReplicatedReplacingMergeTree(delete_col) => delete_col.is_some(),
+            _ => false,
+        }
+    }
+
+    pub fn get_delete_col(&self) -> Option<String> {
+        match self {
+            ClickHouseEngine::ReplacingMergeTree(Some(delete_col)) => Some(delete_col.to_string()),
+            ClickHouseEngine::ReplicatedReplacingMergeTree(Some(delete_col)) => {
+                Some(delete_col.to_string())
+            }
+            _ => None,
+        }
+    }
+
     pub fn get_sign_name(&self) -> Option<String> {
         match self {
             ClickHouseEngine::CollapsingMergeTree(sign_name) => Some(sign_name.to_string()),
@@ -110,10 +130,16 @@ impl ClickHouseEngine {
         }
     }
 
-    pub fn from_query_engine(engine_name: &ClickhouseQueryEngine) -> Result<Self> {
+    pub fn from_query_engine(
+        engine_name: &ClickhouseQueryEngine,
+        config: &ClickHouseConfig,
+    ) -> Result<Self> {
         match engine_name.engine.as_str() {
             "MergeTree" => Ok(ClickHouseEngine::MergeTree),
-            "ReplacingMergeTree" => Ok(ClickHouseEngine::ReplacingMergeTree),
+            "ReplacingMergeTree" => {
+                let delete_column = config.common.delete_column.clone();
+                Ok(ClickHouseEngine::ReplacingMergeTree(delete_column))
+            }
             "SummingMergeTree" => Ok(ClickHouseEngine::SummingMergeTree),
             "AggregatingMergeTree" => Ok(ClickHouseEngine::AggregatingMergeTree),
             // VersionedCollapsingMergeTree(sign_name,"a")
@@ -146,7 +172,12 @@ impl ClickHouseEngine {
             }
             "GraphiteMergeTree" => Ok(ClickHouseEngine::GraphiteMergeTree),
             "ReplicatedMergeTree" => Ok(ClickHouseEngine::ReplicatedMergeTree),
-            "ReplicatedReplacingMergeTree" => Ok(ClickHouseEngine::ReplicatedReplacingMergeTree),
+            "ReplicatedReplacingMergeTree" => {
+                let delete_column = config.common.delete_column.clone();
+                Ok(ClickHouseEngine::ReplicatedReplacingMergeTree(
+                    delete_column,
+                ))
+            }
             "ReplicatedSummingMergeTree" => Ok(ClickHouseEngine::ReplicatedSummingMergeTree),
             "ReplicatedAggregatingMergeTree" => {
                 Ok(ClickHouseEngine::ReplicatedAggregatingMergeTree)
@@ -262,7 +293,7 @@ impl ClickHouseSink {
             .collect();
 
         if rw_fields_name.len().gt(&clickhouse_columns_desc.len()) {
-            return Err(SinkError::ClickHouse("The nums of the RisingWave column must be greater than/equal to the length of the Clickhouse column".to_string()));
+            return Err(SinkError::ClickHouse("The columns of the sink must be equal to or a superset of the target table's columns.".to_string()));
         }
 
         for i in rw_fields_name {
@@ -399,9 +430,16 @@ impl Sink for ClickHouseSink {
         let (clickhouse_column, clickhouse_engine) =
             query_column_engine_from_ck(client, &self.config).await?;
 
-        if !self.is_append_only && !clickhouse_engine.is_collapsing_engine() {
-            return Err(SinkError::ClickHouse(
-                "If you want to use upsert, please modify your engine is `VersionedCollapsingMergeTree` or `CollapsingMergeTree` in ClickHouse".to_owned()));
+        if !self.is_append_only
+            && !clickhouse_engine.is_collapsing_engine()
+            && !clickhouse_engine.is_delete_replacing_engine()
+        {
+            return match clickhouse_engine {
+                ClickHouseEngine::ReplicatedReplacingMergeTree(None) | ClickHouseEngine::ReplacingMergeTree(None) =>  {
+                    Err(SinkError::ClickHouse("To enable upsert with a `ReplacingMergeTree`, you must set a `clickhouse.delete.column` to the UInt8 column in ClickHouse used to signify deletes. See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replacingmergetree#is_deleted for more information".to_owned()))
+                }
+                _ => Err(SinkError::ClickHouse("If you want to use upsert, please use either `VersionedCollapsingMergeTree`, `CollapsingMergeTree` or the `ReplacingMergeTree` in ClickHouse".to_owned()))
+            };
         }
 
         self.check_column_name_and_type(&clickhouse_column)?;
@@ -470,6 +508,9 @@ impl ClickHouseSinkWriter {
         if let Some(sign) = clickhouse_engine.get_sign_name() {
             rw_fields_name_after_calibration.push(sign);
         }
+        if let Some(delete_col) = clickhouse_engine.get_delete_col() {
+            rw_fields_name_after_calibration.push(delete_col);
+        }
         Ok(Self {
             config,
             schema,
@@ -562,21 +603,35 @@ impl ClickHouseSinkWriter {
             }
             match op {
                 Op::Insert | Op::UpdateInsert => {
-                    if self.clickhouse_engine.get_sign_name().is_some() {
+                    if self.clickhouse_engine.is_collapsing_engine() {
                         clickhouse_filed_vec.push(ClickHouseFieldWithNull::WithoutSome(
                             ClickHouseField::Int8(1),
                         ));
                     }
+                    if self.clickhouse_engine.is_delete_replacing_engine() {
+                        clickhouse_filed_vec.push(ClickHouseFieldWithNull::WithoutSome(
+                            ClickHouseField::Int8(0),
+                        ))
+                    }
                 }
                 Op::Delete | Op::UpdateDelete => {
-                    if !self.clickhouse_engine.is_collapsing_engine() {
+                    if !self.clickhouse_engine.is_collapsing_engine()
+                        && !self.clickhouse_engine.is_delete_replacing_engine()
+                    {
                         return Err(SinkError::ClickHouse(
                             "Clickhouse engine don't support upsert".to_string(),
                         ));
                     }
-                    clickhouse_filed_vec.push(ClickHouseFieldWithNull::WithoutSome(
-                        ClickHouseField::Int8(-1),
-                    ))
+                    if self.clickhouse_engine.is_collapsing_engine() {
+                        clickhouse_filed_vec.push(ClickHouseFieldWithNull::WithoutSome(
+                            ClickHouseField::Int8(-1),
+                        ));
+                    }
+                    if self.clickhouse_engine.is_delete_replacing_engine() {
+                        clickhouse_filed_vec.push(ClickHouseFieldWithNull::WithoutSome(
+                            ClickHouseField::Int8(1),
+                        ))
+                    }
                 }
             }
             let clickhouse_column = ClickHouseColumn {
@@ -654,11 +709,16 @@ async fn query_column_engine_from_ck(
     }
 
     let clickhouse_engine =
-        ClickHouseEngine::from_query_engine(clickhouse_engine.first().unwrap())?;
+        ClickHouseEngine::from_query_engine(clickhouse_engine.first().unwrap(), config)?;
 
     if let Some(sign) = &clickhouse_engine.get_sign_name() {
         clickhouse_column.retain(|a| sign.ne(&a.name))
     }
+
+    if let Some(delete_col) = &clickhouse_engine.get_delete_col() {
+        clickhouse_column.retain(|a| delete_col.ne(&a.name))
+    }
+
     Ok((clickhouse_column, clickhouse_engine))
 }
 
diff --git a/src/connector/src/sink/doris.rs b/src/connector/src/sink/doris.rs
index 643ad9e3b7b3b..f8c7b374d11ce 100644
--- a/src/connector/src/sink/doris.rs
+++ b/src/connector/src/sink/doris.rs
@@ -54,6 +54,8 @@ pub struct DorisCommon {
     pub database: String,
     #[serde(rename = "doris.table")]
     pub table: String,
+    #[serde(rename = "doris.partial_update")]
+    pub partial_update: Option<String>,
 }
 
 impl DorisCommon {
@@ -125,8 +127,11 @@ impl DorisSink {
             .collect();
 
         let rw_fields_name = self.schema.fields();
-        if rw_fields_name.len().ne(&doris_columns_desc.len()) {
-            return Err(SinkError::Doris("The length of the RisingWave column must be equal to the length of the doris column".to_string()));
+        if rw_fields_name.len() > doris_columns_desc.len() {
+            return Err(SinkError::Doris(
+                "The columns of the sink must be equal to or a superset of the target table's columns."
+                    .to_string(),
+            ));
         }
 
         for i in rw_fields_name {
@@ -273,6 +278,7 @@ impl DorisSinkWriter {
             .add_common_header()
             .set_user_password(config.common.user.clone(), config.common.password.clone())
             .add_json_format()
+            .set_partial_columns(config.common.partial_update.clone())
             .add_read_json_by_line();
         let header = if !is_append_only {
             header_builder.add_hidden_column().build()
diff --git a/src/connector/src/sink/doris_starrocks_connector.rs b/src/connector/src/sink/doris_starrocks_connector.rs
index 3173b64389f22..fb0a37572710e 100644
--- a/src/connector/src/sink/doris_starrocks_connector.rs
+++ b/src/connector/src/sink/doris_starrocks_connector.rs
@@ -142,6 +142,7 @@ impl HeaderBuilder {
         self
     }
 
+    /// Only use in Starrocks
     pub fn set_partial_update(mut self, partial_update: Option<String>) -> Self {
         self.header.insert(
             "partial_update".to_string(),
@@ -150,6 +151,15 @@ impl HeaderBuilder {
         self
     }
 
+    /// Only use in Doris
+    pub fn set_partial_columns(mut self, partial_columns: Option<String>) -> Self {
+        self.header.insert(
+            "partial_columns".to_string(),
+            partial_columns.unwrap_or_else(|| "false".to_string()),
+        );
+        self
+    }
+
     /// Only used in Starrocks Transaction API
     pub fn set_db(mut self, db: String) -> Self {
         self.header.insert("db".to_string(), db);
diff --git a/src/connector/src/sink/elasticsearch.rs b/src/connector/src/sink/elasticsearch.rs
index 236f90823c505..3d51e48201c94 100644
--- a/src/connector/src/sink/elasticsearch.rs
+++ b/src/connector/src/sink/elasticsearch.rs
@@ -24,7 +24,7 @@ use risingwave_common::types::{JsonbVal, Scalar, ToText};
 use serde_json::Value;
 
 use super::encoder::{JsonEncoder, RowEncoder};
-use super::remote::ElasticSearchSink;
+use super::remote::{ElasticSearchSink, OpensearchSink};
 use crate::sink::{Result, Sink};
 pub const ES_OPTION_DELIMITER: &str = "delimiter";
 pub const ES_OPTION_INDEX_COLUMN: &str = "index_column";
@@ -40,7 +40,7 @@ impl StreamChunkConverter {
         pk_indices: &Vec<usize>,
         properties: &BTreeMap<String, String>,
     ) -> Result<Self> {
-        if sink_name == ElasticSearchSink::SINK_NAME {
+        if is_es_sink(sink_name) {
             let index_column = properties
                 .get(ES_OPTION_INDEX_COLUMN)
                 .cloned()
@@ -170,3 +170,7 @@ impl EsStreamChunkConverter {
         (self.fn_build_id)(row)
     }
 }
+
+pub fn is_es_sink(sink_name: &str) -> bool {
+    sink_name == ElasticSearchSink::SINK_NAME || sink_name == OpensearchSink::SINK_NAME
+}
diff --git a/src/connector/src/sink/encoder/bson.rs b/src/connector/src/sink/encoder/bson.rs
new file mode 100644
index 0000000000000..c401d0575a12b
--- /dev/null
+++ b/src/connector/src/sink/encoder/bson.rs
@@ -0,0 +1,203 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::LazyLock;
+
+use anyhow::anyhow;
+use mongodb::bson::spec::BinarySubtype;
+use mongodb::bson::{Binary, Bson, DateTime, Document};
+use risingwave_common::array::RowRef;
+use risingwave_common::catalog::{Field, Schema};
+use risingwave_common::log::LogSuppresser;
+use risingwave_common::row::Row;
+use risingwave_common::types::{DataType, DatumRef, JsonbVal, ScalarRefImpl};
+use risingwave_common::util::iter_util::ZipEqDebug;
+use thiserror_ext::AsReport;
+
+use super::{Result as SinkResult, RowEncoder, SerTo};
+use crate::sink::SinkError;
+
+static LOG_SUPPERSSER: LazyLock<LogSuppresser> = LazyLock::new(LogSuppresser::default);
+
+pub struct BsonEncoder {
+    schema: Schema,
+    col_indices: Option<Vec<usize>>,
+    pk_indices: Vec<usize>,
+}
+
+impl BsonEncoder {
+    pub fn new(schema: Schema, col_indices: Option<Vec<usize>>, pk_indices: Vec<usize>) -> Self {
+        Self {
+            schema,
+            col_indices,
+            pk_indices,
+        }
+    }
+
+    pub fn construct_pk(&self, row: RowRef<'_>) -> Bson {
+        if self.pk_indices.len() == 1 {
+            let pk_field = &self.schema.fields[self.pk_indices[0]];
+            let pk_datum = row.datum_at(self.pk_indices[0]);
+            datum_to_bson(pk_field, pk_datum)
+        } else {
+            self.pk_indices
+                .iter()
+                .map(|&idx| {
+                    let pk_field = &self.schema.fields[idx];
+                    (
+                        pk_field.name.clone(),
+                        datum_to_bson(pk_field, row.datum_at(idx)),
+                    )
+                })
+                .collect::<Document>()
+                .into()
+        }
+    }
+}
+
+impl SerTo<Vec<u8>> for Document {
+    fn ser_to(self) -> SinkResult<Vec<u8>> {
+        mongodb::bson::to_vec(&self).map_err(|err| {
+            SinkError::Mongodb(anyhow!(err).context("cannot serialize Document to Vec<u8>"))
+        })
+    }
+}
+
+impl RowEncoder for BsonEncoder {
+    type Output = Document;
+
+    fn encode_cols(
+        &self,
+        row: impl Row,
+        col_indices: impl Iterator<Item = usize>,
+    ) -> SinkResult<Self::Output> {
+        Ok(col_indices
+            .map(|idx| (&self.schema.fields[idx], row.datum_at(idx)))
+            .map(|(field, datum)| (field.name.clone(), datum_to_bson(field, datum)))
+            .collect())
+    }
+
+    fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
+    fn col_indices(&self) -> Option<&[usize]> {
+        self.col_indices.as_ref().map(Vec::as_ref)
+    }
+}
+
+/// We support converting all types to `MongoDB`. If there is an unmatched type, it will be
+/// converted to its string representation. If there is a conversion error, a warning log is printed
+/// and a `Bson::Null` is returned
+fn datum_to_bson(field: &Field, datum: DatumRef<'_>) -> Bson {
+    let scalar_ref = match datum {
+        None => {
+            return Bson::Null;
+        }
+        Some(datum) => datum,
+    };
+
+    let data_type = field.data_type();
+
+    match (data_type, scalar_ref) {
+        (DataType::Int16, ScalarRefImpl::Int16(v)) => Bson::Int32(v as i32),
+        (DataType::Int32, ScalarRefImpl::Int32(v)) => Bson::Int32(v),
+        (DataType::Int64, ScalarRefImpl::Int64(v)) => Bson::Int64(v),
+        (DataType::Int256, ScalarRefImpl::Int256(v)) => Bson::String(v.to_string()),
+        (DataType::Float32, ScalarRefImpl::Float32(v)) => Bson::Double(v.into_inner() as f64),
+        (DataType::Float64, ScalarRefImpl::Float64(v)) => Bson::Double(v.into_inner()),
+        (DataType::Varchar, ScalarRefImpl::Utf8(v)) => Bson::String(v.to_string()),
+        (DataType::Boolean, ScalarRefImpl::Bool(v)) => Bson::Boolean(v),
+        (DataType::Decimal, ScalarRefImpl::Decimal(v)) => {
+            let decimal_str = v.to_string();
+            let converted = decimal_str.parse();
+            match converted {
+                Ok(v) => Bson::Decimal128(v),
+                Err(err) => {
+                    if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
+                        tracing::warn!(
+                            suppressed_count,
+                            error = %err.as_report(),
+                            ?field,
+                            "risingwave decimal {} convert to bson decimal128 failed",
+                            decimal_str,
+                        );
+                    }
+                    Bson::Null
+                }
+            }
+        }
+        (DataType::Interval, ScalarRefImpl::Interval(v)) => Bson::String(v.to_string()),
+        (DataType::Date, ScalarRefImpl::Date(v)) => Bson::String(v.to_string()),
+        (DataType::Time, ScalarRefImpl::Time(v)) => Bson::String(v.to_string()),
+        (DataType::Timestamp, ScalarRefImpl::Timestamp(v)) => {
+            Bson::DateTime(DateTime::from_millis(v.0.and_utc().timestamp_millis()))
+        }
+        (DataType::Timestamptz, ScalarRefImpl::Timestamptz(v)) => {
+            Bson::DateTime(DateTime::from_millis(v.timestamp_millis()))
+        }
+        (DataType::Jsonb, ScalarRefImpl::Jsonb(v)) => {
+            let jsonb_val: JsonbVal = v.into();
+            match jsonb_val.take().try_into() {
+                Ok(doc) => doc,
+                Err(err) => {
+                    if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
+                        tracing::warn!(
+                            suppressed_count,
+                            error = %err.as_report(),
+                            ?field,
+                            "convert jsonb to mongodb bson failed",
+                        );
+                    }
+                    Bson::Null
+                }
+            }
+        }
+        (DataType::Serial, ScalarRefImpl::Serial(v)) => Bson::Int64(v.into_inner()),
+        (DataType::Struct(st), ScalarRefImpl::Struct(struct_ref)) => {
+            let mut doc = Document::new();
+            for (sub_datum_ref, sub_field) in struct_ref.iter_fields_ref().zip_eq_debug(
+                st.iter()
+                    .map(|(name, dt)| Field::with_name(dt.clone(), name)),
+            ) {
+                doc.insert(
+                    sub_field.name.clone(),
+                    datum_to_bson(&sub_field, sub_datum_ref),
+                );
+            }
+            Bson::Document(doc)
+        }
+        (DataType::List(dt), ScalarRefImpl::List(v)) => {
+            let inner_field = Field::unnamed(Box::<DataType>::into_inner(dt));
+            v.iter()
+                .map(|scalar_ref| datum_to_bson(&inner_field, scalar_ref))
+                .collect::<Bson>()
+        }
+        (DataType::Bytea, ScalarRefImpl::Bytea(v)) => Bson::Binary(Binary {
+            subtype: BinarySubtype::Generic,
+            bytes: v.into(),
+        }),
+        _ => {
+            if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
+                tracing::warn!(
+                    suppressed_count,
+                    ?field,
+                    ?scalar_ref,
+                    "datum_to_bson: unsupported data type"
+                );
+            }
+            Bson::Null
+        }
+    }
+}
diff --git a/src/connector/src/sink/encoder/mod.rs b/src/connector/src/sink/encoder/mod.rs
index 40d85625c5b1d..889d0162784bb 100644
--- a/src/connector/src/sink/encoder/mod.rs
+++ b/src/connector/src/sink/encoder/mod.rs
@@ -21,12 +21,14 @@ use risingwave_common::row::Row;
 use crate::sink::Result;
 
 mod avro;
+mod bson;
 mod json;
 mod proto;
 pub mod template;
 pub mod text;
 
 pub use avro::{AvroEncoder, AvroHeader};
+pub use bson::BsonEncoder;
 pub use json::JsonEncoder;
 pub use proto::{ProtoEncoder, ProtoHeader};
 
diff --git a/src/connector/src/sink/mod.rs b/src/connector/src/sink/mod.rs
index 8102f03355e8f..097c0e57b15c1 100644
--- a/src/connector/src/sink/mod.rs
+++ b/src/connector/src/sink/mod.rs
@@ -31,6 +31,7 @@ pub mod kafka;
 pub mod kinesis;
 pub mod log_store;
 pub mod mock_coordination_client;
+pub mod mongodb;
 pub mod mqtt;
 pub mod nats;
 pub mod pulsar;
@@ -93,6 +94,7 @@ macro_rules! for_all_sinks {
                 { Nats, $crate::sink::nats::NatsSink },
                 { Jdbc, $crate::sink::remote::JdbcSink },
                 { ElasticSearch, $crate::sink::remote::ElasticSearchSink },
+                { Opensearch, $crate::sink::remote::OpensearchSink },
                 { Cassandra, $crate::sink::remote::CassandraSink },
                 { HttpJava, $crate::sink::remote::HttpJavaSink },
                 { Doris, $crate::sink::doris::DorisSink },
@@ -101,6 +103,7 @@ macro_rules! for_all_sinks {
                 { DeltaLake, $crate::sink::deltalake::DeltaLakeSink },
                 { BigQuery, $crate::sink::big_query::BigQuerySink },
                 { DynamoDb, $crate::sink::dynamodb::DynamoDbSink },
+                { Mongodb, $crate::sink::mongodb::MongodbSink },
                 { SqlServer, $crate::sink::sqlserver::SqlServerSink },
                 { Test, $crate::sink::test_sink::TestSink },
                 { Table, $crate::sink::trivial::TableSink }
@@ -595,6 +598,12 @@ pub enum SinkError {
         #[backtrace]
         ConnectorError,
     ),
+    #[error("Mongodb error: {0}")]
+    Mongodb(
+        #[source]
+        #[backtrace]
+        anyhow::Error,
+    ),
 }
 
 impl From<icelake::Error> for SinkError {
diff --git a/src/connector/src/sink/mongodb.rs b/src/connector/src/sink/mongodb.rs
new file mode 100644
index 0000000000000..8840c72176960
--- /dev/null
+++ b/src/connector/src/sink/mongodb.rs
@@ -0,0 +1,762 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::{BTreeMap, HashMap};
+use std::ops::Deref;
+use std::sync::LazyLock;
+
+use anyhow::anyhow;
+use itertools::Itertools;
+use mongodb::bson::{bson, doc, Array, Bson, Document};
+use mongodb::{Client, Namespace};
+use risingwave_common::array::{Op, RowRef, StreamChunk};
+use risingwave_common::catalog::Schema;
+use risingwave_common::log::LogSuppresser;
+use risingwave_common::must_match;
+use risingwave_common::row::Row;
+use risingwave_common::session_config::sink_decouple::SinkDecouple;
+use risingwave_common::types::ScalarRefImpl;
+use serde_derive::Deserialize;
+use serde_with::{serde_as, DisplayFromStr};
+use thiserror_ext::AsReport;
+use tonic::async_trait;
+use with_options::WithOptions;
+
+use super::catalog::desc::SinkDesc;
+use super::encoder::BsonEncoder;
+use crate::connector_common::MongodbCommon;
+use crate::deserialize_bool_from_string;
+use crate::sink::encoder::RowEncoder;
+use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt};
+use crate::sink::{
+    DummySinkCommitCoordinator, Result, Sink, SinkError, SinkParam, SinkWriterParam,
+    SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT,
+};
+
+pub const MONGODB_SINK: &str = "mongodb";
+
+// 65536 seems like a reasonable limit, but we may consider setting this limit to 100,000,
+// which is the actual limit imposed by the server.
+// see https://www.mongodb.com/docs/v4.2/reference/command/hello/#hello.maxWriteBatchSize for more details
+pub const MONGODB_BULK_WRITE_SIZE_LIMIT: usize = 65536;
+pub const MONGODB_PK_NAME: &str = "_id";
+
+static LOG_SUPPERSSER: LazyLock<LogSuppresser> = LazyLock::new(LogSuppresser::default);
+
+const fn _default_bulk_write_max_entries() -> usize {
+    1024
+}
+
+#[serde_as]
+#[derive(Clone, Debug, Deserialize, WithOptions)]
+pub struct MongodbConfig {
+    #[serde(flatten)]
+    pub common: MongodbCommon,
+
+    pub r#type: String, // accept "append-only" or "upsert"
+
+    /// The dynamic collection name where data should be sunk to. If specified, the field value will be used
+    /// as the collection name. The collection name format is same as `collection.name`. If the field value is
+    /// null or an empty string, then the `collection.name` will be used as a fallback destination.
+    #[serde(rename = "collection.name.field")]
+    pub collection_name_field: Option<String>,
+
+    /// Controls whether the field value of `collection.name.field` should be dropped when sinking.
+    /// Set this option to true to avoid the duplicate values of `collection.name.field` being written to the
+    /// result collection.
+    #[serde(
+        default,
+        deserialize_with = "deserialize_bool_from_string",
+        rename = "collection.name.field.drop"
+    )]
+    pub drop_collection_name_field: bool,
+
+    /// The maximum entries will accumulate before performing the bulk write, defaults to 1024.
+    #[serde(
+        rename = "mongodb.bulk_write.max_entries",
+        default = "_default_bulk_write_max_entries"
+    )]
+    #[serde_as(as = "DisplayFromStr")]
+    pub bulk_write_max_entries: usize,
+}
+
+impl MongodbConfig {
+    pub fn from_btreemap(properties: BTreeMap<String, String>) -> crate::sink::Result<Self> {
+        let config =
+            serde_json::from_value::<MongodbConfig>(serde_json::to_value(properties).unwrap())
+                .map_err(|e| SinkError::Config(anyhow!(e)))?;
+        if config.r#type != SINK_TYPE_APPEND_ONLY && config.r#type != SINK_TYPE_UPSERT {
+            return Err(SinkError::Config(anyhow!(
+                "`{}` must be {}, or {}",
+                SINK_TYPE_OPTION,
+                SINK_TYPE_APPEND_ONLY,
+                SINK_TYPE_UPSERT
+            )));
+        }
+        Ok(config)
+    }
+}
+
+/// An async-drop style `Guard` for `mongodb::Client`. Use this guard to hold a client,
+/// the `client::shutdown` is called in an async manner when the guard is dropped.
+/// Please be aware this is a "best effort" style shutdown, which may not be successful if the
+/// tokio runtime is in the process of terminating. However, the server-side resources will be
+/// cleaned up eventually due to the session expiration.
+/// see [this issue](https://github.com/mongodb/mongo-rust-driver/issues/719) for more information
+struct ClientGuard {
+    _tx: tokio::sync::oneshot::Sender<()>,
+    client: Client,
+}
+
+impl ClientGuard {
+    fn new(name: String, client: Client) -> Self {
+        let client_copy = client.clone();
+        let (_tx, rx) = tokio::sync::oneshot::channel::<()>();
+        tokio::spawn(async move {
+            tracing::debug!(%name, "waiting for client to shut down");
+            let _ = rx.await;
+            tracing::debug!(%name, "sender dropped now calling client's shutdown");
+            // shutdown may stuck if the resources created by client are not dropped at this point.
+            // As recommended by [shutdown](https://docs.rs/mongodb/2.8.2/mongodb/struct.Client.html#method.shutdown)
+            // documentation, we should make our resources usage shorter-lived than the client. So if this happens,
+            // there are some programming error in our code.
+            client_copy.shutdown().await;
+            tracing::debug!(%name, "client shutdown succeeded");
+        });
+        Self { _tx, client }
+    }
+}
+
+impl Deref for ClientGuard {
+    type Target = Client;
+
+    fn deref(&self) -> &Self::Target {
+        &self.client
+    }
+}
+
+#[derive(Debug)]
+pub struct MongodbSink {
+    pub config: MongodbConfig,
+    param: SinkParam,
+    schema: Schema,
+    pk_indices: Vec<usize>,
+    is_append_only: bool,
+}
+
+impl MongodbSink {
+    pub fn new(param: SinkParam) -> Result<Self> {
+        let config = MongodbConfig::from_btreemap(param.properties.clone())?;
+        let pk_indices = param.downstream_pk.clone();
+        let is_append_only = param.sink_type.is_append_only();
+        let schema = param.schema();
+        Ok(Self {
+            config,
+            param,
+            schema,
+            pk_indices,
+            is_append_only,
+        })
+    }
+}
+
+impl TryFrom<SinkParam> for MongodbSink {
+    type Error = SinkError;
+
+    fn try_from(param: SinkParam) -> std::result::Result<Self, Self::Error> {
+        MongodbSink::new(param)
+    }
+}
+
+impl Sink for MongodbSink {
+    type Coordinator = DummySinkCommitCoordinator;
+    type LogSinker = LogSinkerOf<MongodbSinkWriter>;
+
+    const SINK_NAME: &'static str = MONGODB_SINK;
+
+    fn is_sink_decouple(_desc: &SinkDesc, user_specified: &SinkDecouple) -> Result<bool> {
+        match user_specified {
+            // Set default sink decouple to false, because mongodb sink writer only ensure delivery on checkpoint barrier
+            SinkDecouple::Default | SinkDecouple::Disable => Ok(false),
+            SinkDecouple::Enable => Ok(true),
+        }
+    }
+
+    async fn validate(&self) -> Result<()> {
+        if !self.is_append_only {
+            if self.pk_indices.is_empty() {
+                return Err(SinkError::Config(anyhow!(
+                    "Primary key not defined for upsert mongodb sink (please define in `primary_key` field)")));
+            }
+
+            // checking if there is a non-pk field's name is `_id`
+            if self
+                .schema
+                .fields
+                .iter()
+                .enumerate()
+                .any(|(i, field)| !self.pk_indices.contains(&i) && field.name == MONGODB_PK_NAME)
+            {
+                return Err(SinkError::Config(anyhow!(
+                    "_id field must be the sink's primary key, but a non primary key field name is _id",
+                )));
+            }
+
+            // assume the sink's pk is (a, b) and then the data written to mongodb will be
+            // { "_id": {"a": 1, "b": 2}, "a": 1, "b": 2, ... }
+            // you can see that the compound pk (a, b) is turned into an Object {"a": 1, "b": 2}
+            // and the each pk field is become as a field of the document
+            // but if the sink's pk is (_id, b) and the data will be:
+            // { "_id": {"_id": 1, "b": 2}, "b": 2, ... }
+            // in this case, the original _id field of the compound pk has been overridden
+            // we should consider this is a schema error
+            if self.pk_indices.len() > 1
+                && self
+                    .pk_indices
+                    .iter()
+                    .map(|&idx| self.schema.fields[idx].name.as_str())
+                    .any(|field| field == MONGODB_PK_NAME)
+            {
+                return Err(SinkError::Config(anyhow!(
+                    "primary key fields must not contain a field named _id"
+                )));
+            }
+        }
+
+        if self.config.bulk_write_max_entries > MONGODB_BULK_WRITE_SIZE_LIMIT {
+            return Err(SinkError::Config(anyhow!(
+                "mongodb.bulk_write.max_entries {} exceeds the limit {}",
+                self.config.bulk_write_max_entries,
+                MONGODB_BULK_WRITE_SIZE_LIMIT
+            )));
+        }
+
+        if let Err(err) = self.config.common.collection_name.parse::<Namespace>() {
+            return Err(SinkError::Config(anyhow!(err).context(format!(
+                "invalid collection.name {}",
+                self.config.common.collection_name
+            ))));
+        }
+
+        // checking reachability
+        let client = self.config.common.build_client().await?;
+        let client = ClientGuard::new(self.param.sink_name.clone(), client);
+        client
+            .database("admin")
+            .run_command(doc! {"hello":1}, None)
+            .await
+            .map_err(|err| {
+                SinkError::Mongodb(anyhow!(err).context("failed to send hello command to mongodb"))
+            })?;
+
+        if self.config.drop_collection_name_field && self.config.collection_name_field.is_none() {
+            return Err(SinkError::Config(anyhow!(
+                    "collection.name.field must be specified when collection.name.field.drop is enabled"
+                )));
+        }
+
+        // checking dynamic collection name settings
+        if let Some(coll_field) = &self.config.collection_name_field {
+            let fields = self.schema.fields();
+
+            let coll_field_index = fields
+                .iter()
+                .enumerate()
+                .find_map(|(index, field)| {
+                    if &field.name == coll_field {
+                        Some(index)
+                    } else {
+                        None
+                    }
+                })
+                .ok_or(SinkError::Config(anyhow!(
+                    "collection.name.field {} not found",
+                    coll_field
+                )))?;
+
+            if fields[coll_field_index].data_type() != risingwave_common::types::DataType::Varchar {
+                return Err(SinkError::Config(anyhow!(
+                    "the type of collection.name.field {} must be varchar",
+                    coll_field
+                )));
+            }
+
+            if !self.is_append_only && self.pk_indices.iter().any(|idx| *idx == coll_field_index) {
+                return Err(SinkError::Config(anyhow!(
+                    "collection.name.field {} must not be equal to the primary key field",
+                    coll_field
+                )));
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
+        Ok(MongodbSinkWriter::new(
+            format!("{}-{}", writer_param.executor_id, self.param.sink_name),
+            self.config.clone(),
+            self.schema.clone(),
+            self.pk_indices.clone(),
+            self.is_append_only,
+        )
+        .await?
+        .into_log_sinker(writer_param.sink_metrics))
+    }
+}
+
+pub struct MongodbSinkWriter {
+    pub config: MongodbConfig,
+    payload_writer: MongodbPayloadWriter,
+    is_append_only: bool,
+    // TODO switching to bulk write API when mongodb driver supports it
+    command_builder: CommandBuilder,
+}
+
+impl MongodbSinkWriter {
+    pub async fn new(
+        name: String,
+        config: MongodbConfig,
+        schema: Schema,
+        pk_indices: Vec<usize>,
+        is_append_only: bool,
+    ) -> Result<Self> {
+        let client = config.common.build_client().await?;
+
+        let default_namespace =
+            config
+                .common
+                .collection_name
+                .parse()
+                .map_err(|err: mongodb::error::Error| {
+                    SinkError::Mongodb(anyhow!(err).context("parsing default namespace failed"))
+                })?;
+
+        let coll_name_field_index =
+            config
+                .collection_name_field
+                .as_ref()
+                .and_then(|coll_name_field| {
+                    schema
+                        .names_str()
+                        .iter()
+                        .position(|&name| coll_name_field == name)
+                });
+
+        let col_indices = if let Some(coll_name_field_index) = coll_name_field_index
+            && config.drop_collection_name_field
+        {
+            (0..schema.fields.len())
+                .filter(|idx| *idx != coll_name_field_index)
+                .collect_vec()
+        } else {
+            (0..schema.fields.len()).collect_vec()
+        };
+
+        let row_encoder = BsonEncoder::new(schema.clone(), Some(col_indices), pk_indices.clone());
+
+        let command_builder = if is_append_only {
+            CommandBuilder::AppendOnly(HashMap::new())
+        } else {
+            CommandBuilder::Upsert(HashMap::new())
+        };
+
+        let payload_writer = MongodbPayloadWriter::new(
+            schema,
+            pk_indices,
+            default_namespace,
+            coll_name_field_index,
+            ClientGuard::new(name, client),
+            config.bulk_write_max_entries,
+            row_encoder,
+        );
+
+        Ok(Self {
+            config,
+            payload_writer,
+            is_append_only,
+            command_builder,
+        })
+    }
+
+    async fn append(&mut self, chunk: StreamChunk) -> Result<()> {
+        let insert_builder =
+            must_match!(&mut self.command_builder, CommandBuilder::AppendOnly(builder) => builder);
+        for (op, row) in chunk.rows() {
+            if op != Op::Insert {
+                if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
+                    tracing::warn!(
+                        suppressed_count,
+                        ?op,
+                        ?row,
+                        "non-insert op received in append-only mode"
+                    );
+                }
+                continue;
+            }
+            self.payload_writer.append(insert_builder, row).await?;
+        }
+        Ok(())
+    }
+
+    async fn upsert(&mut self, chunk: StreamChunk) -> Result<()> {
+        let upsert_builder =
+            must_match!(&mut self.command_builder, CommandBuilder::Upsert(builder) => builder);
+        for (op, row) in chunk.rows() {
+            if op == Op::UpdateDelete {
+                // we should ignore the `UpdateDelete` in upsert mode
+                continue;
+            }
+            self.payload_writer.upsert(upsert_builder, op, row).await?;
+        }
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl SinkWriter for MongodbSinkWriter {
+    async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> {
+        Ok(())
+    }
+
+    async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> {
+        if self.is_append_only {
+            self.append(chunk).await
+        } else {
+            self.upsert(chunk).await
+        }
+    }
+
+    async fn barrier(&mut self, is_checkpoint: bool) -> Result<Self::CommitMetadata> {
+        if is_checkpoint {
+            if self.is_append_only {
+                let insert_builder = must_match!(&mut self.command_builder, CommandBuilder::AppendOnly(builder) => builder);
+                self.payload_writer.flush_insert(insert_builder).await?;
+            } else {
+                let upsert_builder = must_match!(&mut self.command_builder, CommandBuilder::Upsert(builder) => builder);
+                self.payload_writer.flush_upsert(upsert_builder).await?;
+            }
+        }
+        Ok(())
+    }
+}
+
+struct InsertCommandBuilder {
+    coll: String,
+    inserts: Array,
+}
+
+impl InsertCommandBuilder {
+    fn new(coll: String, capacity: usize) -> Self {
+        Self {
+            coll,
+            inserts: Array::with_capacity(capacity),
+        }
+    }
+
+    fn append(&mut self, row: Document) {
+        self.inserts.push(Bson::Document(row));
+    }
+
+    fn build(self) -> Document {
+        doc! {
+            "insert": self.coll,
+            "ordered": true,
+            "documents": self.inserts,
+        }
+    }
+}
+
+struct UpsertCommandBuilder {
+    coll: String,
+    updates: Array,
+    deletes: HashMap<Vec<u8>, Document>,
+}
+
+impl UpsertCommandBuilder {
+    fn new(coll: String, capacity: usize) -> Self {
+        Self {
+            coll,
+            updates: Array::with_capacity(capacity),
+            deletes: HashMap::with_capacity(capacity),
+        }
+    }
+
+    fn add_upsert(&mut self, pk: Document, row: Document) -> Result<()> {
+        let pk_data = mongodb::bson::to_vec(&pk).map_err(|err| {
+            SinkError::Mongodb(anyhow!(err).context("cannot serialize primary key"))
+        })?;
+        // under same pk, if the record currently being upserted was marked for deletion previously, we should
+        // revert the deletion, otherwise, the upserting record may be accidentally deleted.
+        // see https://github.com/risingwavelabs/risingwave/pull/17102#discussion_r1630684160 for more information.
+        self.deletes.remove(&pk_data);
+
+        self.updates.push(bson!( {
+            "q": pk,
+            "u": row,
+            "upsert": true,
+            "multi": false,
+        }));
+
+        Ok(())
+    }
+
+    fn add_delete(&mut self, pk: Document) -> Result<()> {
+        let pk_data = mongodb::bson::to_vec(&pk).map_err(|err| {
+            SinkError::Mongodb(anyhow!(err).context("cannot serialize primary key"))
+        })?;
+        self.deletes.insert(pk_data, pk);
+        Ok(())
+    }
+
+    fn build(self) -> (Option<Document>, Option<Document>) {
+        let (mut upsert_document, mut delete_document) = (None, None);
+        if !self.updates.is_empty() {
+            upsert_document = Some(doc! {
+                "update": self.coll.clone(),
+                "ordered": true,
+                "updates": self.updates,
+            });
+        }
+        if !self.deletes.is_empty() {
+            let deletes = self
+                .deletes
+                .into_values()
+                .map(|pk| {
+                    bson!({
+                        "q": pk,
+                        "limit": 1,
+                    })
+                })
+                .collect::<Array>();
+
+            delete_document = Some(doc! {
+                "delete": self.coll,
+                "ordered": true,
+                "deletes": deletes,
+            });
+        }
+        (upsert_document, delete_document)
+    }
+}
+
+enum CommandBuilder {
+    AppendOnly(HashMap<MongodbNamespace, InsertCommandBuilder>),
+    Upsert(HashMap<MongodbNamespace, UpsertCommandBuilder>),
+}
+
+type MongodbNamespace = (String, String);
+
+// In the future, we may build the payload into RawBSON to gain a better performance.
+// The current API (mongodb-2.8.2) lacks the support of writing RawBSON.
+struct MongodbPayloadWriter {
+    schema: Schema,
+    pk_indices: Vec<usize>,
+    default_namespace: Namespace,
+    coll_name_field_index: Option<usize>,
+    client: ClientGuard,
+    buffered_entries: usize,
+    max_entries: usize,
+    row_encoder: BsonEncoder,
+}
+
+impl MongodbPayloadWriter {
+    fn new(
+        schema: Schema,
+        pk_indices: Vec<usize>,
+        default_namespace: Namespace,
+        coll_name_field_index: Option<usize>,
+        client: ClientGuard,
+        max_entries: usize,
+        row_encoder: BsonEncoder,
+    ) -> Self {
+        Self {
+            schema,
+            pk_indices,
+            default_namespace,
+            coll_name_field_index,
+            client,
+            buffered_entries: 0,
+            max_entries,
+            row_encoder,
+        }
+    }
+
+    fn extract_namespace_from_row_ref(&self, row: RowRef<'_>) -> MongodbNamespace {
+        let ns = self.coll_name_field_index.and_then(|coll_name_field_index| {
+            match row.datum_at(coll_name_field_index) {
+                Some(ScalarRefImpl::Utf8(v)) => match v.parse::<Namespace>() {
+                    Ok(ns) => Some(ns),
+                    Err(err) => {
+                        if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
+                            tracing::warn!(
+                                suppressed_count,
+                                error = %err.as_report(),
+                                collection_name = %v,
+                                "parsing collection name failed, fallback to use default collection.name"
+                            );
+                        }
+                        None
+                    }
+                },
+                _ => {
+                    if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
+                        tracing::warn!(
+                            suppressed_count,
+                            "the value of collection.name.field is null, fallback to use default collection.name"
+                        );
+                    }
+                    None
+                }
+            }
+        });
+        match ns {
+            Some(ns) => (ns.db, ns.coll),
+            None => (
+                self.default_namespace.db.clone(),
+                self.default_namespace.coll.clone(),
+            ),
+        }
+    }
+
+    async fn append(
+        &mut self,
+        insert_builder: &mut HashMap<MongodbNamespace, InsertCommandBuilder>,
+        row: RowRef<'_>,
+    ) -> Result<()> {
+        let document = self.row_encoder.encode(row)?;
+        let ns = self.extract_namespace_from_row_ref(row);
+        let coll = ns.1.clone();
+
+        insert_builder
+            .entry(ns)
+            .or_insert_with(|| InsertCommandBuilder::new(coll, self.max_entries))
+            .append(document);
+
+        self.buffered_entries += 1;
+        if self.buffered_entries >= self.max_entries {
+            self.flush_insert(insert_builder).await?;
+        }
+        Ok(())
+    }
+
+    async fn upsert(
+        &mut self,
+        upsert_builder: &mut HashMap<MongodbNamespace, UpsertCommandBuilder>,
+        op: Op,
+        row: RowRef<'_>,
+    ) -> Result<()> {
+        let mut document = self.row_encoder.encode(row)?;
+        let ns = self.extract_namespace_from_row_ref(row);
+        let coll = ns.1.clone();
+
+        let pk = self.row_encoder.construct_pk(row);
+
+        // Specify the primary key (_id) for the MongoDB collection if the user does not provide one.
+        if self.pk_indices.len() > 1
+            || self.schema.fields[self.pk_indices[0]].name != MONGODB_PK_NAME
+        {
+            // compound pk should not have a field named `_id`
+            document.insert(MONGODB_PK_NAME, pk.clone());
+        }
+
+        let pk = doc! {MONGODB_PK_NAME: pk};
+        match op {
+            Op::Insert | Op::UpdateInsert => upsert_builder
+                .entry(ns)
+                .or_insert_with(|| UpsertCommandBuilder::new(coll, self.max_entries))
+                .add_upsert(pk, document)?,
+            Op::UpdateDelete => (),
+            Op::Delete => upsert_builder
+                .entry(ns)
+                .or_insert_with(|| UpsertCommandBuilder::new(coll, self.max_entries))
+                .add_delete(pk)?,
+        }
+
+        self.buffered_entries += 1;
+        if self.buffered_entries >= self.max_entries {
+            self.flush_upsert(upsert_builder).await?;
+        }
+        Ok(())
+    }
+
+    async fn flush_insert(
+        &mut self,
+        insert_builder: &mut HashMap<MongodbNamespace, InsertCommandBuilder>,
+    ) -> Result<()> {
+        // TODO try sending bulk-write of each collection concurrently to improve the performance when
+        // `dynamic collection` is enabled. We may need to provide best practice to guide user on setting
+        // the MongoDB driver's connection properties.
+        for (ns, builder) in insert_builder.drain() {
+            self.send_bulk_write_command(&ns.0, builder.build()).await?;
+        }
+        self.buffered_entries = 0;
+        Ok(())
+    }
+
+    async fn flush_upsert(
+        &mut self,
+        upsert_builder: &mut HashMap<MongodbNamespace, UpsertCommandBuilder>,
+    ) -> Result<()> {
+        // TODO try sending bulk-write of each collection concurrently to improve the performance when
+        // `dynamic collection` is enabled. We may need to provide best practice to guide user on setting
+        // the MongoDB driver's connection properties.
+        for (ns, builder) in upsert_builder.drain() {
+            let (upsert, delete) = builder.build();
+            // we are sending the bulk upsert first because, under same pk, the `Insert` and `UpdateInsert`
+            // should always appear before `Delete`. we have already ignored the `UpdateDelete`
+            // which is useless in upsert mode.
+            if let Some(upsert) = upsert {
+                self.send_bulk_write_command(&ns.0, upsert).await?;
+            }
+            if let Some(delete) = delete {
+                self.send_bulk_write_command(&ns.0, delete).await?;
+            }
+        }
+        self.buffered_entries = 0;
+        Ok(())
+    }
+
+    async fn send_bulk_write_command(&self, database: &str, command: Document) -> Result<()> {
+        let db = self.client.database(database);
+
+        let result = db.run_command(command, None).await.map_err(|err| {
+            SinkError::Mongodb(anyhow!(err).context(format!(
+                "sending bulk write command failed, database: {}",
+                database
+            )))
+        })?;
+
+        if let Ok(write_errors) = result.get_array("writeErrors") {
+            return Err(SinkError::Mongodb(anyhow!(
+                "bulk write respond with write errors: {:?}",
+                write_errors,
+            )));
+        }
+
+        let n = result.get_i32("n").map_err(|err| {
+            SinkError::Mongodb(
+                anyhow!(err).context("can't extract field n from bulk write response"),
+            )
+        })?;
+        if n < 1 {
+            return Err(SinkError::Mongodb(anyhow!(
+                "bulk write respond with an abnormal state, n = {}",
+                n
+            )));
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs
index f8b84fc64eb86..863ca02f69d59 100644
--- a/src/connector/src/sink/remote.rs
+++ b/src/connector/src/sink/remote.rs
@@ -58,7 +58,7 @@ use tokio::task::spawn_blocking;
 use tokio_stream::wrappers::ReceiverStream;
 use tracing::warn;
 
-use super::elasticsearch::{StreamChunkConverter, ES_OPTION_DELIMITER};
+use super::elasticsearch::{is_es_sink, StreamChunkConverter, ES_OPTION_DELIMITER};
 use crate::error::ConnectorResult;
 use crate::sink::catalog::desc::SinkDesc;
 use crate::sink::coordinate::CoordinatedSinkWriter;
@@ -73,6 +73,7 @@ macro_rules! def_remote_sink {
     () => {
         def_remote_sink! {
             { ElasticSearch, ElasticSearchSink, "elasticsearch" }
+            { Opensearch, OpensearchSink, "opensearch"}
             { Cassandra, CassandraSink, "cassandra" }
             { Jdbc, JdbcSink, "jdbc", |desc| {
                 desc.sink_type.is_append_only()
@@ -164,7 +165,7 @@ impl<R: RemoteSinkTrait> Sink for RemoteSink<R> {
 }
 
 async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> ConnectorResult<()> {
-    if sink_name == ElasticSearchSink::SINK_NAME
+    if is_es_sink(sink_name)
         && param.downstream_pk.len() > 1
         && !param.properties.contains_key(ES_OPTION_DELIMITER)
     {
@@ -189,7 +190,7 @@ async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> ConnectorRe
                     | DataType::Jsonb
                     | DataType::Bytea => Ok(()),
             DataType::List(list) => {
-                if (sink_name==ElasticSearchSink::SINK_NAME) | matches!(list.as_ref(), DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Float32 | DataType::Float64 | DataType::Varchar){
+                if is_es_sink(sink_name) || matches!(list.as_ref(), DataType::Int16 | DataType::Int32 | DataType::Int64 | DataType::Float32 | DataType::Float64 | DataType::Varchar){
                     Ok(())
                 } else{
                     Err(SinkError::Remote(anyhow!(
@@ -200,7 +201,7 @@ async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> ConnectorRe
                 }
             },
             DataType::Struct(_) => {
-                if sink_name==ElasticSearchSink::SINK_NAME{
+                if is_es_sink(sink_name){
                     Ok(())
                 }else{
                     Err(SinkError::Remote(anyhow!(
@@ -263,7 +264,7 @@ impl RemoteLogSinker {
         sink_name: &str,
     ) -> Result<Self> {
         let sink_proto = sink_param.to_proto();
-        let payload_schema = if sink_name == ElasticSearchSink::SINK_NAME {
+        let payload_schema = if is_es_sink(sink_name) {
             let columns = vec![
                 ColumnDesc::unnamed(ColumnId::from(0), DataType::Varchar).to_protobuf(),
                 ColumnDesc::unnamed(ColumnId::from(1), DataType::Varchar).to_protobuf(),
diff --git a/src/connector/src/sink/snowflake_connector.rs b/src/connector/src/sink/snowflake_connector.rs
index 3adaa43bb5aa6..bfd2458900294 100644
--- a/src/connector/src/sink/snowflake_connector.rs
+++ b/src/connector/src/sink/snowflake_connector.rs
@@ -18,6 +18,7 @@ use std::time::{SystemTime, UNIX_EPOCH};
 
 use anyhow::{anyhow, Context};
 use jsonwebtoken::{encode, Algorithm, EncodingKey, Header};
+use object_metrics::GLOBAL_OBJECT_STORE_METRICS;
 use reqwest::{header, Client, RequestBuilder, StatusCode};
 use risingwave_object_store::object::*;
 use serde::{Deserialize, Serialize};
@@ -197,11 +198,13 @@ impl SnowflakeS3Client {
         // FIXME: we should use the `ObjectStoreConfig` instead of default
         // just use default configuration here for opendal s3 engine
         let config = ObjectStoreConfig::default();
+        let metrics = Arc::new(GLOBAL_OBJECT_STORE_METRICS.clone());
 
         // create the s3 engine for streaming upload to the intermediate s3 bucket
         let opendal_s3_engine = OpendalObjectStore::new_s3_engine_with_credentials(
             &s3_bucket,
             Arc::new(config),
+            metrics,
             &aws_access_key_id,
             &aws_secret_access_key,
             &aws_region,
diff --git a/src/connector/src/sink/starrocks.rs b/src/connector/src/sink/starrocks.rs
index bace71cd59e2c..64dadf0b89866 100644
--- a/src/connector/src/sink/starrocks.rs
+++ b/src/connector/src/sink/starrocks.rs
@@ -168,7 +168,7 @@ impl StarrocksSink {
     ) -> Result<()> {
         let rw_fields_name = self.schema.fields();
         if rw_fields_name.len() > starrocks_columns_desc.len() {
-            return Err(SinkError::Starrocks("The length of the RisingWave column must be equal or less to the length of the starrocks column".to_string()));
+            return Err(SinkError::Starrocks("The columns of the sink must be equal to or a superset of the target table's columns.".to_string()));
         }
 
         for i in rw_fields_name {
diff --git a/src/connector/src/source/cdc/external/mod.rs b/src/connector/src/source/cdc/external/mod.rs
index 65bd4adb48132..528e14bd60229 100644
--- a/src/connector/src/source/cdc/external/mod.rs
+++ b/src/connector/src/source/cdc/external/mod.rs
@@ -225,8 +225,11 @@ impl ExternalTableConfig {
 #[derive(Debug, Clone, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum SslMode {
+    #[serde(alias = "disable")]
     Disabled,
+    #[serde(alias = "prefer")]
     Preferred,
+    #[serde(alias = "require")]
     Required,
 }
 
diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml
index 731bb900335ee..7a9aaa444400b 100644
--- a/src/connector/with_options_sink.yaml
+++ b/src/connector/with_options_sink.yaml
@@ -21,6 +21,10 @@ BigQueryConfig:
     field_type: usize
     required: false
     default: '1024'
+  - name: bigquery.retry_times
+    field_type: usize
+    required: false
+    default: '5'
   - name: aws.region
     field_type: String
     required: false
@@ -84,6 +88,9 @@ ClickHouseConfig:
   - name: clickhouse.table
     field_type: String
     required: true
+  - name: clickhouse.delete.column
+    field_type: String
+    required: false
   - name: r#type
     field_type: String
     required: true
@@ -132,6 +139,9 @@ DorisConfig:
   - name: doris.table
     field_type: String
     required: true
+  - name: doris.partial_update
+    field_type: String
+    required: false
   - name: r#type
     field_type: String
     required: true
@@ -511,6 +521,33 @@ KinesisSinkConfig:
     required: false
     alias:
     - kinesis.assumerole.external_id
+MongodbConfig:
+  fields:
+  - name: mongodb.url
+    field_type: String
+    comments: The URL of MongoDB
+    required: true
+  - name: collection.name
+    field_type: String
+    comments: The collection name where data should be written to or read from. For sinks, the format is  `db_name.collection_name`. Data can also be written to dynamic collections, see `collection.name.field`  for more information.
+    required: true
+  - name: r#type
+    field_type: String
+    required: true
+  - name: collection.name.field
+    field_type: String
+    comments: The dynamic collection name where data should be sunk to. If specified, the field value will be used  as the collection name. The collection name format is same as `collection.name`. If the field value is  null or an empty string, then the `collection.name` will be used as a fallback destination.
+    required: false
+  - name: collection.name.field.drop
+    field_type: bool
+    comments: Controls whether the field value of `collection.name.field` should be dropped when sinking.  Set this option to true to avoid the duplicate values of `collection.name.field` being written to the  result collection.
+    required: false
+    default: Default::default
+  - name: mongodb.bulk_write.max_entries
+    field_type: usize
+    comments: The maximum entries will accumulate before performing the bulk write, defaults to 1024.
+    required: false
+    default: '1024'
 MqttConfig:
   fields:
   - name: url
diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml
index 822ab25ea3ef7..2a176f760f96c 100644
--- a/src/connector/with_options_source.yaml
+++ b/src/connector/with_options_source.yaml
@@ -314,6 +314,16 @@ KinesisProperties:
     required: false
     alias:
     - kinesis.assumerole.external_id
+MongodbCommon:
+  fields:
+  - name: mongodb.url
+    field_type: String
+    comments: The URL of MongoDB
+    required: true
+  - name: collection.name
+    field_type: String
+    comments: The collection name where data should be written to or read from. For sinks, the format is  `db_name.collection_name`. Data can also be written to dynamic collections, see `collection.name.field`  for more information.
+    required: true
 MqttProperties:
   fields:
   - name: url
diff --git a/src/ctl/src/cmd_impl/hummock/sst_dump.rs b/src/ctl/src/cmd_impl/hummock/sst_dump.rs
index ce69ab87f1b7c..3a71fbd007214 100644
--- a/src/ctl/src/cmd_impl/hummock/sst_dump.rs
+++ b/src/ctl/src/cmd_impl/hummock/sst_dump.rs
@@ -15,7 +15,6 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use anyhow::anyhow;
 use bytes::{Buf, Bytes};
 use chrono::offset::Utc;
 use chrono::DateTime;
@@ -123,11 +122,7 @@ pub async fn sst_dump(context: &CtlContext, args: SstDumpArgs) -> anyhow::Result
         if let Some(obj_id) = &args.object_id {
             let obj_store = sstable_store.store();
             let obj_path = sstable_store.get_sst_data_path(*obj_id);
-            let mut obj_metadata_iter = obj_store.list(&obj_path).await?;
-            let obj = obj_metadata_iter
-                .try_next()
-                .await?
-                .ok_or_else(|| anyhow!(format!("object {obj_path} doesn't exist")))?;
+            let obj = obj_store.metadata(&obj_path).await?;
             print_object(&obj);
             let meta_offset = get_meta_offset_from_object(&obj, obj_store.as_ref()).await?;
             let obj_id = SstableStore::get_object_id_from_path(&obj.key);
diff --git a/src/frontend/src/binder/relation/table_or_source.rs b/src/frontend/src/binder/relation/table_or_source.rs
index c5283a2cc592a..94b2980fcb63d 100644
--- a/src/frontend/src/binder/relation/table_or_source.rs
+++ b/src/frontend/src/binder/relation/table_or_source.rs
@@ -17,7 +17,7 @@ use std::sync::Arc;
 use either::Either;
 use itertools::Itertools;
 use risingwave_common::bail_not_implemented;
-use risingwave_common::catalog::{is_system_schema, Field};
+use risingwave_common::catalog::{debug_assert_column_ids_distinct, is_system_schema, Field};
 use risingwave_common::session_config::USER_NAME_WILD_CARD;
 use risingwave_connector::WithPropertiesExt;
 use risingwave_sqlparser::ast::{AsOf, Statement, TableAlias};
@@ -221,6 +221,7 @@ impl Binder {
         source_catalog: &SourceCatalog,
         as_of: Option<AsOf>,
     ) -> (Relation, Vec<(bool, Field)>) {
+        debug_assert_column_ids_distinct(&source_catalog.columns);
         self.included_relations.insert(source_catalog.id.into());
         (
             Relation::Source(Box::new(BoundSource {
diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs
index 71394f46dcf51..a0a35ffebb3f4 100644
--- a/src/frontend/src/catalog/table_catalog.rs
+++ b/src/frontend/src/catalog/table_catalog.rs
@@ -210,7 +210,7 @@ impl TableType {
     }
 }
 
-/// The version of a table, used by schema change. See [`PbTableVersion`].
+/// The version of a table, used by schema change. See [`PbTableVersion`] for more details.
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub struct TableVersion {
     pub version_id: TableVersionId,
diff --git a/src/frontend/src/handler/alter_source_column.rs b/src/frontend/src/handler/alter_source_column.rs
index fcabedc1149c4..43985518fd1b3 100644
--- a/src/frontend/src/handler/alter_source_column.rs
+++ b/src/frontend/src/handler/alter_source_column.rs
@@ -14,7 +14,7 @@
 
 use itertools::Itertools;
 use pgwire::pg_response::{PgResponse, StatementType};
-use risingwave_common::catalog::ColumnId;
+use risingwave_common::catalog::max_column_id;
 use risingwave_connector::source::{extract_source_struct, SourceEncode, SourceStruct};
 use risingwave_sqlparser::ast::{
     AlterSourceOperation, ColumnDef, CreateSourceStatement, ObjectName, Statement,
@@ -106,10 +106,7 @@ pub async fn handle_alter_source_column(
             catalog.definition =
                 alter_definition_add_column(&catalog.definition, column_def.clone())?;
             let mut bound_column = bind_sql_columns(&[column_def])?.remove(0);
-            bound_column.column_desc.column_id = columns
-                .iter()
-                .fold(ColumnId::new(i32::MIN), |a, b| a.max(b.column_id()))
-                .next();
+            bound_column.column_desc.column_id = max_column_id(columns).next();
             columns.push(bound_column);
         }
         _ => unreachable!(),
diff --git a/src/frontend/src/handler/alter_source_with_sr.rs b/src/frontend/src/handler/alter_source_with_sr.rs
index c72cf547365d7..070b26b6a25e2 100644
--- a/src/frontend/src/handler/alter_source_with_sr.rs
+++ b/src/frontend/src/handler/alter_source_with_sr.rs
@@ -18,7 +18,7 @@ use anyhow::Context;
 use itertools::Itertools;
 use pgwire::pg_response::StatementType;
 use risingwave_common::bail_not_implemented;
-use risingwave_common::catalog::ColumnCatalog;
+use risingwave_common::catalog::{max_column_id, ColumnCatalog};
 use risingwave_connector::WithPropertiesExt;
 use risingwave_pb::catalog::StreamSourceInfo;
 use risingwave_pb::plan_common::{EncodeType, FormatType};
@@ -68,14 +68,19 @@ fn encode_type_to_encode(from: EncodeType) -> Option<Encode> {
     })
 }
 
-/// Returns the columns in `columns_a` but not in `columns_b`,
-/// where the comparison is done by name and data type,
-/// and hidden columns are ignored.
+/// Returns the columns in `columns_a` but not in `columns_b`.
+///
+/// Note:
+/// - The comparison is done by name and data type, without checking `ColumnId`.
+/// - Hidden columns and `INCLUDE ... AS ...` columns are ignored. Because it's only for the special handling of alter sr.
+///   For the newly resolved `columns_from_resolve_source` (created by [`bind_columns_from_source`]), it doesn't contain hidden columns (`_row_id`) and `INCLUDE ... AS ...` columns.
+///   This is fragile and we should really refactor it later.
 fn columns_minus(columns_a: &[ColumnCatalog], columns_b: &[ColumnCatalog]) -> Vec<ColumnCatalog> {
     columns_a
         .iter()
         .filter(|col_a| {
             !col_a.is_hidden()
+                && !col_a.is_connector_additional_column()
                 && !columns_b.iter().any(|col_b| {
                     col_a.name() == col_b.name() && col_a.data_type() == col_b.data_type()
                 })
@@ -162,8 +167,20 @@ pub async fn refresh_sr_and_get_columns_diff(
         unreachable!("source without schema registry is rejected")
     };
 
-    let added_columns = columns_minus(&columns_from_resolve_source, &original_source.columns);
+    let mut added_columns = columns_minus(&columns_from_resolve_source, &original_source.columns);
+    // The newly resolved columns' column IDs also starts from 1. They cannot be used directly.
+    let mut next_col_id = max_column_id(&original_source.columns).next();
+    for col in &mut added_columns {
+        col.column_desc.column_id = next_col_id;
+        next_col_id = next_col_id.next();
+    }
     let dropped_columns = columns_minus(&original_source.columns, &columns_from_resolve_source);
+    tracing::debug!(
+        ?added_columns,
+        ?dropped_columns,
+        ?columns_from_resolve_source,
+        original_source = ?original_source.columns
+    );
 
     Ok((source_info, added_columns, dropped_columns))
 }
diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs
index a29aa86907e0f..1f458bbbc09b5 100644
--- a/src/frontend/src/handler/create_source.rs
+++ b/src/frontend/src/handler/create_source.rs
@@ -24,7 +24,7 @@ use pgwire::pg_response::{PgResponse, StatementType};
 use risingwave_common::array::arrow::{FromArrow, IcebergArrowConvert};
 use risingwave_common::bail_not_implemented;
 use risingwave_common::catalog::{
-    is_column_ids_dedup, ColumnCatalog, ColumnDesc, ColumnId, Schema, TableId,
+    debug_assert_column_ids_distinct, ColumnCatalog, ColumnDesc, ColumnId, Schema, TableId,
     INITIAL_SOURCE_VERSION_ID, KAFKA_TIMESTAMP_COLUMN_NAME,
 };
 use risingwave_common::types::DataType;
@@ -73,8 +73,8 @@ use crate::error::ErrorCode::{self, Deprecated, InvalidInputSyntax, NotSupported
 use crate::error::{Result, RwError};
 use crate::expr::Expr;
 use crate::handler::create_table::{
-    bind_pk_on_relation, bind_sql_column_constraints, bind_sql_columns, bind_sql_pk_names,
-    ensure_table_constraints_supported, ColumnIdGenerator,
+    bind_pk_and_row_id_on_relation, bind_sql_column_constraints, bind_sql_columns,
+    bind_sql_pk_names, ensure_table_constraints_supported, ColumnIdGenerator,
 };
 use crate::handler::util::SourceSchemaCompatExt;
 use crate::handler::HandlerArgs;
@@ -289,8 +289,11 @@ fn get_name_strategy_or_default(name_strategy: Option<AstString>) -> Result<Opti
     }
 }
 
-/// resolve the schema of the source from external schema file, return the relation's columns. see <https://www.risingwave.dev/docs/current/sql-create-source> for more information.
-/// return `(columns, source info)`
+/// Resolves the schema of the source from external schema file.
+/// See <https://www.risingwave.dev/docs/current/sql-create-source> for more information.
+///
+/// Note: the returned schema strictly corresponds to the schema.
+/// Other special columns like additional columns (`INCLUDE`), and `row_id` column are not included.
 pub(crate) async fn bind_columns_from_source(
     session: &SessionImpl,
     source_schema: &ConnectorSchema,
@@ -489,6 +492,28 @@ pub(crate) async fn bind_columns_from_source(
         }
     };
 
+    if cfg!(debug_assertions) {
+        // validate column ids
+        // Note: this just documents how it works currently. It doesn't mean whether it's reasonable.
+        if let Some(ref columns) = columns {
+            let mut i = 1;
+            fn check_col(col: &ColumnDesc, i: &mut usize, columns: &Vec<ColumnCatalog>) {
+                for nested_col in &col.field_descs {
+                    // What's the usage of struct fields' column IDs?
+                    check_col(nested_col, i, columns);
+                }
+                assert!(
+                    col.column_id.get_id() == *i as i32,
+                    "unexpected column id\ncol: {col:?}\ni: {i}\ncolumns: {columns:#?}"
+                );
+                *i += 1;
+            }
+            for col in columns {
+                check_col(&col.column_desc, &mut i, columns);
+            }
+        }
+    }
+
     if !format_encode_options_to_consume.is_empty() {
         let err_string = format!(
             "Get unknown format_encode_options for {:?} {:?}: {}",
@@ -1387,10 +1412,12 @@ pub async fn bind_create_source(
         .into());
     }
 
+    // XXX: why do we use col_id_gen here? It doesn't seem to be very necessary.
+    // XXX: should we also chenge the col id for struct fields?
     for c in &mut columns {
         c.column_desc.column_id = col_id_gen.generate(c.name())
     }
-    debug_assert!(is_column_ids_dedup(&columns));
+    debug_assert_column_ids_distinct(&columns);
 
     let must_need_pk = if is_create_source {
         with_properties.connector_need_pk()
@@ -1403,7 +1430,7 @@ pub async fn bind_create_source(
     };
 
     let (mut columns, pk_col_ids, row_id_index) =
-        bind_pk_on_relation(columns, pk_names, must_need_pk)?;
+        bind_pk_and_row_id_on_relation(columns, pk_names, must_need_pk)?;
 
     let watermark_descs =
         bind_source_watermark(session, source_name.clone(), source_watermarks, &columns)?;
diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs
index 0f3693653ced5..c542762702053 100644
--- a/src/frontend/src/handler/create_table.rs
+++ b/src/frontend/src/handler/create_table.rs
@@ -24,7 +24,7 @@ use pgwire::pg_response::{PgResponse, StatementType};
 use risingwave_common::bail_not_implemented;
 use risingwave_common::catalog::{
     CdcTableDesc, ColumnCatalog, ColumnDesc, TableId, TableVersionId, DEFAULT_SCHEMA_NAME,
-    INITIAL_TABLE_VERSION_ID, USER_COLUMN_ID_OFFSET,
+    INITIAL_TABLE_VERSION_ID,
 };
 use risingwave_common::util::sort_util::{ColumnOrder, OrderType};
 use risingwave_common::util::value_encoding::DatumToProtoExt;
@@ -110,7 +110,7 @@ impl ColumnIdGenerator {
     pub fn new_initial() -> Self {
         Self {
             existing: HashMap::new(),
-            next_column_id: ColumnId::from(USER_COLUMN_ID_OFFSET),
+            next_column_id: ColumnId::first_user_column(),
             version_id: INITIAL_TABLE_VERSION_ID,
         }
     }
@@ -404,7 +404,7 @@ fn multiple_pk_definition_err() -> RwError {
 ///
 /// It returns the columns together with `pk_column_ids`, and an optional row id column index if
 /// added.
-pub fn bind_pk_on_relation(
+pub fn bind_pk_and_row_id_on_relation(
     mut columns: Vec<ColumnCatalog>,
     pk_names: Vec<String>,
     must_need_pk: bool,
@@ -570,7 +570,8 @@ pub(crate) fn gen_create_table_plan_without_source(
 ) -> Result<(PlanRef, PbTable)> {
     ensure_table_constraints_supported(&constraints)?;
     let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
-    let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?;
+    let (mut columns, pk_column_ids, row_id_index) =
+        bind_pk_and_row_id_on_relation(columns, pk_names, true)?;
 
     let watermark_descs: Vec<WatermarkDesc> = bind_source_watermark(
         context.session_ctx(),
@@ -762,7 +763,8 @@ pub(crate) fn gen_create_table_plan_for_cdc_table(
         c.column_desc.column_id = col_id_gen.generate(c.name())
     }
 
-    let (columns, pk_column_ids, _row_id_index) = bind_pk_on_relation(columns, pk_names, true)?;
+    let (columns, pk_column_ids, _row_id_index) =
+        bind_pk_and_row_id_on_relation(columns, pk_names, true)?;
 
     let definition = context.normalized_sql().to_owned();
 
@@ -881,7 +883,6 @@ fn derive_connect_properties(
 pub(super) async fn handle_create_table_plan(
     handler_args: HandlerArgs,
     explain_options: ExplainOptions,
-    col_id_gen: ColumnIdGenerator,
     source_schema: Option<ConnectorSchema>,
     cdc_table_info: Option<CdcTableInfo>,
     table_name: ObjectName,
@@ -894,6 +895,7 @@ pub(super) async fn handle_create_table_plan(
     with_version_column: Option<String>,
     include_column_options: IncludeOption,
 ) -> Result<(PlanRef, Option<PbSource>, PbTable, TableJobType)> {
+    let col_id_gen = ColumnIdGenerator::new_initial();
     let source_schema = check_create_table_with_source(
         &handler_args.with_options,
         source_schema,
@@ -1148,11 +1150,9 @@ pub async fn handle_create_table(
     }
 
     let (graph, source, table, job_type) = {
-        let col_id_gen = ColumnIdGenerator::new_initial();
         let (plan, source, table, job_type) = handle_create_table_plan(
             handler_args,
             ExplainOptions::default(),
-            col_id_gen,
             source_schema,
             cdc_table_info,
             table_name.clone(),
@@ -1435,7 +1435,8 @@ mod tests {
                 }
                 ensure_table_constraints_supported(&constraints)?;
                 let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
-                let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?;
+                let (_, pk_column_ids, _) =
+                    bind_pk_and_row_id_on_relation(columns, pk_names, true)?;
                 Ok(pk_column_ids)
             })();
             match (expected, actual) {
diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs
index 9f46087c206e8..db124b373181b 100644
--- a/src/frontend/src/handler/explain.rs
+++ b/src/frontend/src/handler/explain.rs
@@ -22,7 +22,6 @@ use thiserror_ext::AsReport;
 use super::create_index::{gen_create_index_plan, resolve_index_schema};
 use super::create_mv::gen_create_mv_plan;
 use super::create_sink::{gen_sink_plan, get_partition_compute_info};
-use super::create_table::ColumnIdGenerator;
 use super::query::gen_batch_plan_by_statement;
 use super::util::SourceSchemaCompatExt;
 use super::{RwPgResponse, RwPgResponseBuilderExt};
@@ -66,14 +65,11 @@ async fn do_handle_explain(
                 wildcard_idx,
                 ..
             } => {
-                let col_id_gen = ColumnIdGenerator::new_initial();
-
                 let source_schema = source_schema.map(|s| s.into_v2_with_warning());
 
                 let (plan, _source, _table, _job_type) = handle_create_table_plan(
                     handler_args,
                     explain_options,
-                    col_id_gen,
                     source_schema,
                     cdc_table_info,
                     name.clone(),
diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs
index 7fd4f0b92822b..73b52b977c7a4 100644
--- a/src/frontend/src/handler/util.rs
+++ b/src/frontend/src/handler/util.rs
@@ -151,6 +151,15 @@ fn to_pg_rows(
     session_data: &StaticSessionData,
 ) -> RwResult<Vec<Row>> {
     assert_eq!(chunk.dimension(), column_types.len());
+    if cfg!(debug_assertions) {
+        let chunk_data_types = chunk.data_types();
+        for (ty1, ty2) in chunk_data_types.iter().zip_eq_fast(column_types) {
+            debug_assert!(
+                ty1.equals_datatype(ty2),
+                "chunk_data_types: {chunk_data_types:?}, column_types: {column_types:?}"
+            )
+        }
+    }
 
     chunk
         .rows()
diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs
index 0310fdbbd439b..918db2919e626 100644
--- a/src/frontend/src/optimizer/plan_node/logical_source.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_source.rs
@@ -70,6 +70,12 @@ impl LogicalSource {
         ctx: OptimizerContextRef,
         as_of: Option<AsOf>,
     ) -> Result<Self> {
+        // XXX: should we reorder the columns?
+        // The order may be strange if the schema is changed, e.g., [foo:Varchar, _rw_kafka_timestamp:Timestamptz, _row_id:Serial, bar:Int32]
+        // related: https://github.com/risingwavelabs/risingwave/issues/16486
+        // The order does not matter much. The columns field is essentially a map indexed by the column id.
+        // It will affect what users will see in `SELECT *`.
+        // But not sure if we rely on the position of hidden column like `_row_id` somewhere. For `projected_row_id` we do so...
         let core = generic::Source {
             catalog: source_catalog,
             column_catalog,
diff --git a/src/frontend/src/utils/with_options.rs b/src/frontend/src/utils/with_options.rs
index 8f372b58b17ea..92c65786afdeb 100644
--- a/src/frontend/src/utils/with_options.rs
+++ b/src/frontend/src/utils/with_options.rs
@@ -19,7 +19,7 @@ use risingwave_connector::source::kafka::private_link::{
     insert_privatelink_broker_rewrite_map, CONNECTION_NAME_KEY, PRIVATELINK_ENDPOINT_KEY,
 };
 use risingwave_connector::WithPropertiesExt;
-use risingwave_pb::catalog::PbSecretRef;
+use risingwave_pb::secret::PbSecretRef;
 use risingwave_sqlparser::ast::{
     CreateConnectionStatement, CreateSinkStatement, CreateSourceStatement,
     CreateSubscriptionStatement, SqlOption, Statement, Value,
diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml
index 6252d845788af..ddae0c9c24626 100644
--- a/src/meta/Cargo.toml
+++ b/src/meta/Cargo.toml
@@ -33,6 +33,7 @@ either = "1"
 enum-as-inner = "0.6"
 etcd-client = { workspace = true }
 fail = "0.5"
+flate2 = "1"
 function_name = "0.3.0"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 hex = "0.4"
diff --git a/src/meta/model_v2/migration/src/lib.rs b/src/meta/model_v2/migration/src/lib.rs
index 5c68b8903b02c..254b7504feb06 100644
--- a/src/meta/model_v2/migration/src/lib.rs
+++ b/src/meta/model_v2/migration/src/lib.rs
@@ -13,6 +13,7 @@ mod m20240506_112555_subscription_partial_ckpt;
 mod m20240525_090457_secret;
 mod m20240617_070131_index_column_properties;
 mod m20240617_071625_sink_into_table_column;
+mod m20240618_072634_function_compressed_binary;
 
 pub struct Migrator;
 
@@ -30,6 +31,7 @@ impl MigratorTrait for Migrator {
             Box::new(m20240506_112555_subscription_partial_ckpt::Migration),
             Box::new(m20240525_090457_secret::Migration),
             Box::new(m20240617_071625_sink_into_table_column::Migration),
+            Box::new(m20240618_072634_function_compressed_binary::Migration),
             Box::new(m20240617_070131_index_column_properties::Migration),
         ]
     }
diff --git a/src/meta/model_v2/migration/src/m20240618_072634_function_compressed_binary.rs b/src/meta/model_v2/migration/src/m20240618_072634_function_compressed_binary.rs
new file mode 100644
index 0000000000000..6b4ef6157bcf6
--- /dev/null
+++ b/src/meta/model_v2/migration/src/m20240618_072634_function_compressed_binary.rs
@@ -0,0 +1,74 @@
+use sea_orm_migration::prelude::*;
+
+use crate::sea_orm::{DatabaseBackend, DbBackend, Statement};
+
+#[derive(DeriveMigrationName)]
+pub struct Migration;
+
+#[async_trait::async_trait]
+impl MigrationTrait for Migration {
+    async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
+        // Fix mismatch column `compressed_binary` type and do data migration
+        match manager.get_database_backend() {
+            DbBackend::MySql => {
+                // Creating function with compressed binary will fail in previous version, so we can
+                // safely assume that the column is always empty and we can just modify the column type
+                // without any data migration.
+                manager
+                    .alter_table(
+                        Table::alter()
+                            .table(Function::Table)
+                            .modify_column(
+                                ColumnDef::new(Function::CompressedBinary).blob(BlobSize::Medium),
+                            )
+                            .to_owned(),
+                    )
+                    .await?;
+            }
+            DbBackend::Postgres => {
+                manager.get_connection().execute(Statement::from_string(
+                    DatabaseBackend::Postgres,
+                    "ALTER TABLE function ALTER COLUMN compressed_binary TYPE bytea USING compressed_binary::bytea",
+                )).await?;
+            }
+            DbBackend::Sqlite => {
+                // Sqlite does not support modifying column type, so we need to do data migration and column renaming.
+                // Note that: all these DDLs are not transactional, so if some of them fail, we need to manually run it again.
+                let conn = manager.get_connection();
+                conn.execute(Statement::from_string(
+                    DatabaseBackend::Sqlite,
+                    "ALTER TABLE function ADD COLUMN compressed_binary_new BLOB",
+                ))
+                .await?;
+                conn.execute(Statement::from_string(
+                    DatabaseBackend::Sqlite,
+                    "UPDATE function SET compressed_binary_new = compressed_binary",
+                ))
+                .await?;
+                conn.execute(Statement::from_string(
+                    DatabaseBackend::Sqlite,
+                    "ALTER TABLE function DROP COLUMN compressed_binary",
+                ))
+                .await?;
+                conn.execute(Statement::from_string(
+                    DatabaseBackend::Sqlite,
+                    "ALTER TABLE function RENAME COLUMN compressed_binary_new TO compressed_binary",
+                ))
+                .await?;
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn down(&self, _manager: &SchemaManager) -> Result<(), DbErr> {
+        // DO nothing, the operations in `up` are idempotent and required to fix the column type mismatch.
+        Ok(())
+    }
+}
+
+#[derive(DeriveIden)]
+enum Function {
+    Table,
+    CompressedBinary,
+}
diff --git a/src/meta/model_v2/src/lib.rs b/src/meta/model_v2/src/lib.rs
index 116cb66cab1dc..751ae99b64a17 100644
--- a/src/meta/model_v2/src/lib.rs
+++ b/src/meta/model_v2/src/lib.rs
@@ -14,8 +14,9 @@
 
 use std::collections::BTreeMap;
 
-use risingwave_pb::catalog::{PbCreateType, PbSecretRef, PbStreamJobStatus};
+use risingwave_pb::catalog::{PbCreateType, PbStreamJobStatus};
 use risingwave_pb::meta::table_fragments::PbState as PbStreamJobState;
+use risingwave_pb::secret::PbSecretRef;
 use risingwave_pb::stream_plan::PbStreamNode;
 use sea_orm::entity::prelude::*;
 use sea_orm::{DeriveActiveEnum, EnumIter, FromJsonQueryResult};
diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs
index 0e49f0805bf1b..b6dbdf2500109 100644
--- a/src/meta/node/src/lib.rs
+++ b/src/meta/node/src/lib.rs
@@ -380,6 +380,9 @@ pub fn start(opts: MetaNodeOpts) -> Pin<Box<dyn Future<Output = ()> + Send>> {
                     .max_trivial_move_task_count_per_loop,
                 max_get_task_probe_times: config.meta.developer.max_get_task_probe_times,
                 secret_store_private_key: config.meta.secret_store_private_key,
+                table_info_statistic_history_times: config
+                    .storage
+                    .table_info_statistic_history_times,
             },
             config.system.into_init_system_params(),
             Default::default(),
diff --git a/src/meta/src/hummock/manager/commit_epoch.rs b/src/meta/src/hummock/manager/commit_epoch.rs
index 1437a6eb3bfd4..9a494bc509b4f 100644
--- a/src/meta/src/hummock/manager/commit_epoch.rs
+++ b/src/meta/src/hummock/manager/commit_epoch.rs
@@ -34,7 +34,6 @@ use crate::hummock::manager::transaction::{
     HummockVersionStatsTransaction, HummockVersionTransaction,
 };
 use crate::hummock::manager::versioning::Versioning;
-use crate::hummock::manager::HISTORY_TABLE_INFO_STATISTIC_TIME;
 use crate::hummock::metrics_utils::{
     get_or_create_local_table_stat, trigger_local_table_stat, trigger_sst_stat,
 };
@@ -482,7 +481,7 @@ impl HummockManager {
             let throughput = (stat.total_value_size + stat.total_key_size) as u64;
             let entry = table_infos.entry(table_id).or_default();
             entry.push_back(throughput);
-            if entry.len() > HISTORY_TABLE_INFO_STATISTIC_TIME {
+            if entry.len() > self.env.opts.table_info_statistic_history_times {
                 entry.pop_front();
             }
         }
diff --git a/src/meta/src/hummock/manager/compaction.rs b/src/meta/src/hummock/manager/compaction.rs
index e406292edef2f..906824c155f7a 100644
--- a/src/meta/src/hummock/manager/compaction.rs
+++ b/src/meta/src/hummock/manager/compaction.rs
@@ -27,7 +27,7 @@
 // limitations under the License.
 
 use std::cmp::min;
-use std::collections::{BTreeMap, HashMap, HashSet};
+use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
 use std::sync::{Arc, LazyLock};
 use std::time::{Instant, SystemTime};
 
@@ -42,6 +42,7 @@ use rand::seq::SliceRandom;
 use rand::thread_rng;
 use risingwave_common::util::epoch::Epoch;
 use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockLevelsExt;
+use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId;
 use risingwave_hummock_sdk::table_stats::{
     add_prost_table_stats_map, purge_prost_table_stats, PbTableStatsMap,
 };
@@ -469,9 +470,9 @@ impl HummockManager {
                                 let cancel_tasks = compactor_manager.update_task_heartbeats(&progress).into_iter().map(|task|task.task_id).collect::<Vec<_>>();
                                 if !cancel_tasks.is_empty() {
                                     tracing::info!(
-                                        "Tasks cancel with task_ids {:?} with context_id {} has expired due to lack of visible progress",
-                                        cancel_tasks,
+                                        ?cancel_tasks,
                                         context_id,
+                                        "Tasks cancel has expired due to lack of visible progress",
                                     );
 
                                     if let Err(e) = hummock_manager
@@ -490,12 +491,14 @@ impl HummockManager {
                                     // Forcefully cancel the task so that it terminates
                                     // early on the compactor
                                     // node.
-                                    let _ = compactor.cancel_tasks(&cancel_tasks);
-                                    tracing::info!(
-                                        "CancelTask operation for task_id {:?} has been sent to node with context_id {}",
-                                        cancel_tasks,
-                                        context_id
-                                    );
+                                    if !cancel_tasks.is_empty() {
+                                        let _ = compactor.cancel_tasks(&cancel_tasks);
+                                        tracing::info!(
+                                            ?cancel_tasks,
+                                            context_id,
+                                            "CancelTask operation has been sent to compactor node",
+                                        );
+                                    }
                                 } else {
                                     // Determine the validity of the compactor streaming rpc. When the compactor no longer exists in the manager, the stream will be removed.
                                     // Tip: Connectivity to the compactor will be determined through the `send_event` operation. When send fails, it will be removed from the manager
@@ -515,7 +518,7 @@ impl HummockManager {
                         if compactor_alive {
                             push_stream(context_id, stream, &mut compactor_request_streams);
                         } else {
-                            tracing::warn!("compactor stream {} error, send stream may be destroyed", context_id);
+                            tracing::warn!(context_id, "compactor stream error, send stream may be destroyed");
                         }
                     },
                 }
@@ -1557,6 +1560,80 @@ impl HummockManager {
                 .retain(|table_id, _| compact_task.existing_table_ids.contains(table_id));
         }
     }
+
+    pub async fn try_move_table_to_dedicated_cg(
+        &self,
+        table_write_throughput: &HashMap<u32, VecDeque<u64>>,
+        table_id: &u32,
+        table_size: &u64,
+        is_creating_table: bool,
+        checkpoint_secs: u64,
+        parent_group_id: u64,
+        group_size: u64,
+    ) {
+        let default_group_id: CompactionGroupId = StaticCompactionGroupId::StateDefault.into();
+        let mv_group_id: CompactionGroupId = StaticCompactionGroupId::MaterializedView.into();
+        let partition_vnode_count = self.env.opts.partition_vnode_count;
+        let window_size =
+            self.env.opts.table_info_statistic_history_times / (checkpoint_secs as usize);
+
+        let mut is_high_write_throughput = false;
+        let mut is_low_write_throughput = true;
+        if let Some(history) = table_write_throughput.get(table_id) {
+            if history.len() >= window_size {
+                is_high_write_throughput = history.iter().all(|throughput| {
+                    *throughput / checkpoint_secs > self.env.opts.table_write_throughput_threshold
+                });
+                is_low_write_throughput = history.iter().any(|throughput| {
+                    *throughput / checkpoint_secs < self.env.opts.min_table_split_write_throughput
+                });
+            }
+        }
+
+        let state_table_size = *table_size;
+
+        // 1. Avoid splitting a creating table
+        // 2. Avoid splitting a is_low_write_throughput creating table
+        // 3. Avoid splitting a non-high throughput medium-sized table
+        if is_creating_table
+            || (is_low_write_throughput)
+            || (state_table_size < self.env.opts.min_table_split_size && !is_high_write_throughput)
+        {
+            return;
+        }
+
+        // do not split a large table and a small table because it would increase IOPS
+        // of small table.
+        if parent_group_id != default_group_id && parent_group_id != mv_group_id {
+            let rest_group_size = group_size - state_table_size;
+            if rest_group_size < state_table_size
+                && rest_group_size < self.env.opts.min_table_split_size
+            {
+                return;
+            }
+        }
+
+        let ret = self
+            .move_state_table_to_compaction_group(
+                parent_group_id,
+                &[*table_id],
+                partition_vnode_count,
+            )
+            .await;
+        match ret {
+            Ok(new_group_id) => {
+                tracing::info!("move state table [{}] from group-{} to group-{} success table_vnode_partition_count {:?}", table_id, parent_group_id, new_group_id, partition_vnode_count);
+            }
+            Err(e) => {
+                tracing::info!(
+                    error = %e.as_report(),
+                    "failed to move state table [{}] from group-{}",
+                    table_id,
+                    parent_group_id,
+                )
+            }
+        }
+    }
 }
 
 #[cfg(any(test, feature = "test"))]
diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs
index 8a49d91a55fc3..e3511d1e4b204 100644
--- a/src/meta/src/hummock/manager/mod.rs
+++ b/src/meta/src/hummock/manager/mod.rs
@@ -72,7 +72,6 @@ pub use compaction::{check_cg_write_limit, WriteLimitType};
 pub(crate) use utils::*;
 
 type Snapshot = ArcSwap<HummockSnapshot>;
-const HISTORY_TABLE_INFO_STATISTIC_TIME: usize = 240;
 
 // Update to states are performed as follow:
 // - Initialize ValTransaction for the meta state to update
diff --git a/src/meta/src/hummock/manager/timer_task.rs b/src/meta/src/hummock/manager/timer_task.rs
index b7c8cae4b260e..bb4a9fa86b06c 100644
--- a/src/meta/src/hummock/manager/timer_task.rs
+++ b/src/meta/src/hummock/manager/timer_task.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::{HashMap, HashSet, VecDeque};
+use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -22,8 +22,6 @@ use futures::{FutureExt, StreamExt};
 use itertools::Itertools;
 use risingwave_common::system_param::reader::SystemParamsRead;
 use risingwave_hummock_sdk::compaction_group::hummock_version_ext::get_compaction_group_ids;
-use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId;
-use risingwave_hummock_sdk::CompactionGroupId;
 use risingwave_pb::hummock::compact_task::{self, TaskStatus};
 use risingwave_pb::hummock::level_handler::RunningCompactTask;
 use rw_futures_util::select_all;
@@ -33,7 +31,6 @@ use tokio::task::JoinHandle;
 use tokio_stream::wrappers::IntervalStream;
 use tracing::warn;
 
-use crate::hummock::manager::HISTORY_TABLE_INFO_STATISTIC_TIME;
 use crate::hummock::metrics_utils::{trigger_lsm_stat, trigger_mv_stat};
 use crate::hummock::{HummockManager, TASK_NORMAL};
 
@@ -468,7 +465,7 @@ impl HummockManager {
             }
 
             for (table_id, table_size) in &group.table_statistic {
-                self.calculate_table_align_rule(
+                self.try_move_table_to_dedicated_cg(
                     &table_write_throughput,
                     table_id,
                     table_size,
@@ -494,93 +491,4 @@ impl HummockManager {
             }
         }
     }
-
-    async fn calculate_table_align_rule(
-        &self,
-        table_write_throughput: &HashMap<u32, VecDeque<u64>>,
-        table_id: &u32,
-        table_size: &u64,
-        is_creating_table: bool,
-        checkpoint_secs: u64,
-        parent_group_id: u64,
-        group_size: u64,
-    ) {
-        let default_group_id: CompactionGroupId = StaticCompactionGroupId::StateDefault.into();
-        let mv_group_id: CompactionGroupId = StaticCompactionGroupId::MaterializedView.into();
-        let partition_vnode_count = self.env.opts.partition_vnode_count;
-        let window_size = HISTORY_TABLE_INFO_STATISTIC_TIME / (checkpoint_secs as usize);
-
-        let mut is_high_write_throughput = false;
-        let mut is_low_write_throughput = true;
-        if let Some(history) = table_write_throughput.get(table_id) {
-            if !is_creating_table {
-                if history.len() >= window_size {
-                    is_high_write_throughput = history.iter().all(|throughput| {
-                        *throughput / checkpoint_secs
-                            > self.env.opts.table_write_throughput_threshold
-                    });
-                    is_low_write_throughput = history.iter().any(|throughput| {
-                        *throughput / checkpoint_secs
-                            < self.env.opts.min_table_split_write_throughput
-                    });
-                }
-            } else {
-                // For creating table, relax the checking restrictions to make the data alignment behavior more sensitive.
-                let sum = history.iter().sum::<u64>();
-                is_low_write_throughput = sum
-                    < self.env.opts.min_table_split_write_throughput
-                        * history.len() as u64
-                        * checkpoint_secs;
-            }
-        }
-
-        let state_table_size = *table_size;
-
-        // 1. Avoid splitting a creating table
-        // 2. Avoid splitting a is_low_write_throughput creating table
-        // 3. Avoid splitting a non-high throughput medium-sized table
-        if is_creating_table
-            || (is_low_write_throughput)
-            || (state_table_size < self.env.opts.min_table_split_size && !is_high_write_throughput)
-        {
-            return;
-        }
-
-        // do not split a large table and a small table because it would increase IOPS
-        // of small table.
-        if parent_group_id != default_group_id && parent_group_id != mv_group_id {
-            let rest_group_size = group_size - state_table_size;
-            if rest_group_size < state_table_size
-                && rest_group_size < self.env.opts.min_table_split_size
-            {
-                return;
-            }
-        }
-
-        let ret = self
-            .move_state_table_to_compaction_group(
-                parent_group_id,
-                &[*table_id],
-                partition_vnode_count,
-            )
-            .await;
-        match ret {
-            Ok(new_group_id) => {
-                tracing::info!(
-                    "move state table [{}] from group-{} to group-{} success",
-                    table_id,
-                    parent_group_id,
-                    new_group_id
-                );
-            }
-            Err(e) => {
-                tracing::info!(
-                    error = %e.as_report(),
-                    "failed to move state table [{}] from group-{}",
-                    table_id,
-                    parent_group_id,
-                )
-            }
-        }
-    }
 }
diff --git a/src/meta/src/hummock/model/compact_task_assignment.rs b/src/meta/src/hummock/model/compact_task_assignment.rs
index e8b9402680795..74fdf6e84dadd 100644
--- a/src/meta/src/hummock/model/compact_task_assignment.rs
+++ b/src/meta/src/hummock/model/compact_task_assignment.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use prost::Message;
 use risingwave_hummock_sdk::HummockCompactionTaskId;
 use risingwave_pb::hummock::CompactTaskAssignment;
 
@@ -32,10 +31,6 @@ impl MetadataModel for CompactTaskAssignment {
         self.clone()
     }
 
-    fn to_protobuf_encoded_vec(&self) -> Vec<u8> {
-        self.encode_to_vec()
-    }
-
     fn from_protobuf(prost: Self::PbType) -> Self {
         prost
     }
diff --git a/src/meta/src/hummock/model/pinned_snapshot.rs b/src/meta/src/hummock/model/pinned_snapshot.rs
index c7db58719f46c..f485d9dab7211 100644
--- a/src/meta/src/hummock/model/pinned_snapshot.rs
+++ b/src/meta/src/hummock/model/pinned_snapshot.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use prost::Message;
 use risingwave_hummock_sdk::HummockContextId;
 use risingwave_pb::hummock::HummockPinnedSnapshot;
 
@@ -32,10 +31,6 @@ impl MetadataModel for HummockPinnedSnapshot {
         self.clone()
     }
 
-    fn to_protobuf_encoded_vec(&self) -> Vec<u8> {
-        self.encode_to_vec()
-    }
-
     fn from_protobuf(prost: Self::PbType) -> Self {
         prost
     }
diff --git a/src/meta/src/hummock/model/pinned_version.rs b/src/meta/src/hummock/model/pinned_version.rs
index 1b92bc300c797..e8f6b2e65e75e 100644
--- a/src/meta/src/hummock/model/pinned_version.rs
+++ b/src/meta/src/hummock/model/pinned_version.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use prost::Message;
 use risingwave_hummock_sdk::HummockContextId;
 use risingwave_pb::hummock::HummockPinnedVersion;
 
@@ -32,10 +31,6 @@ impl MetadataModel for HummockPinnedVersion {
         self.clone()
     }
 
-    fn to_protobuf_encoded_vec(&self) -> Vec<u8> {
-        self.encode_to_vec()
-    }
-
     fn from_protobuf(prost: Self::PbType) -> Self {
         prost
     }
diff --git a/src/meta/src/hummock/model/version_delta.rs b/src/meta/src/hummock/model/version_delta.rs
index 34cadc675da7a..1a87b9d456989 100644
--- a/src/meta/src/hummock/model/version_delta.rs
+++ b/src/meta/src/hummock/model/version_delta.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use prost::Message;
 use risingwave_hummock_sdk::version::HummockVersionDelta;
 use risingwave_hummock_sdk::HummockVersionId;
 use risingwave_pb::hummock::PbHummockVersionDelta;
@@ -33,10 +32,6 @@ impl MetadataModel for HummockVersionDelta {
         self.to_protobuf()
     }
 
-    fn to_protobuf_encoded_vec(&self) -> Vec<u8> {
-        self.to_protobuf().encode_to_vec()
-    }
-
     fn from_protobuf(prost: Self::PbType) -> Self {
         Self::from_persisted_protobuf(&prost)
     }
diff --git a/src/meta/src/hummock/model/version_stats.rs b/src/meta/src/hummock/model/version_stats.rs
index e6ee772e3de9e..512adca422bd5 100644
--- a/src/meta/src/hummock/model/version_stats.rs
+++ b/src/meta/src/hummock/model/version_stats.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use prost::Message;
 use risingwave_hummock_sdk::HummockVersionId;
 use risingwave_pb::hummock::HummockVersionStats;
 
@@ -33,10 +32,6 @@ impl MetadataModel for HummockVersionStats {
         self.clone()
     }
 
-    fn to_protobuf_encoded_vec(&self) -> Vec<u8> {
-        self.encode_to_vec()
-    }
-
     fn from_protobuf(prost: Self::PbType) -> Self {
         prost
     }
diff --git a/src/meta/src/manager/env.rs b/src/meta/src/manager/env.rs
index 7a284ca2fccd4..e10d64a656328 100644
--- a/src/meta/src/manager/env.rs
+++ b/src/meta/src/manager/env.rs
@@ -284,6 +284,8 @@ pub struct MetaOpts {
 
     // The private key for the secret store, used when the secret is stored in the meta.
     pub secret_store_private_key: Vec<u8>,
+
+    pub table_info_statistic_history_times: usize,
 }
 
 impl MetaOpts {
@@ -345,6 +347,7 @@ impl MetaOpts {
             max_trivial_move_task_count_per_loop: 256,
             max_get_task_probe_times: 5,
             secret_store_private_key: "demo-secret-private-key".as_bytes().to_vec(),
+            table_info_statistic_history_times: 240,
         }
     }
 }
diff --git a/src/meta/src/model/mod.rs b/src/meta/src/model/mod.rs
index e87251ee6d413..1ca22854c5def 100644
--- a/src/meta/src/model/mod.rs
+++ b/src/meta/src/model/mod.rs
@@ -23,14 +23,15 @@ mod user;
 use std::collections::btree_map::{Entry, VacantEntry};
 use std::collections::BTreeMap;
 use std::fmt::Debug;
+use std::io::{Read, Write};
 use std::ops::{Deref, DerefMut};
 
+use anyhow::Context as _;
 use async_trait::async_trait;
 pub use cluster::*;
 pub use error::*;
 pub use migration_plan::*;
 pub use notification::*;
-use prost::Message;
 pub use stream::*;
 
 use crate::storage::{MetaStore, MetaStoreError, Snapshot, Transaction};
@@ -57,13 +58,23 @@ mod private {
     pub trait MetadataModelMarker {}
 }
 
+/// Compress the value if it's larger then the threshold to avoid hitting the limit of etcd.
+///
+/// By default, the maximum size of any request to etcd is 1.5 MB. So we use a slightly
+/// smaller value here. However, note that this is still a best-effort approach, as the
+/// compressed size may still exceed the limit, in which case we should set the parameter
+/// `--max-request-bytes` of etcd to a larger value.
+const MODEL_COMPRESSION_THRESHOLD: usize = 1 << 20;
+
 /// `MetadataModel` defines basic model operations in CRUD.
+// TODO: better to move the methods that we don't want implementors to override to a separate
+// extension trait.
 #[async_trait]
 pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker {
     /// Serialized prost message type.
-    type PbType: Message + Default;
+    type PbType: prost::Message + Default;
     /// Serialized key type.
-    type KeyType: Message;
+    type KeyType: prost::Message;
 
     /// Column family for this model.
     fn cf_name() -> String;
@@ -71,17 +82,59 @@ pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker
     /// Serialize to protobuf.
     fn to_protobuf(&self) -> Self::PbType;
 
-    /// Serialize to protobuf encoded byte vector.
-    fn to_protobuf_encoded_vec(&self) -> Vec<u8> {
-        self.to_protobuf().encode_to_vec()
-    }
-
     /// Deserialize from protobuf.
     fn from_protobuf(prost: Self::PbType) -> Self;
 
     /// Current record key.
     fn key(&self) -> MetadataModelResult<Self::KeyType>;
 
+    /// Encode key to bytes. Should not be overridden.
+    fn encode_key(key: &Self::KeyType) -> Vec<u8> {
+        use prost::Message;
+        key.encode_to_vec()
+    }
+
+    /// Encode value to bytes. Should not be overridden.
+    fn encode_value(value: &Self::PbType) -> Vec<u8> {
+        use flate2::write::GzEncoder;
+        use flate2::Compression;
+        use prost::Message;
+
+        let pb_encoded = value.encode_to_vec();
+
+        // Compress the value if it's larger then the threshold to avoid hitting the limit of etcd.
+        if pb_encoded.len() > MODEL_COMPRESSION_THRESHOLD {
+            let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+            encoder.write_all(&pb_encoded).unwrap();
+            encoder.finish().unwrap()
+        } else {
+            pb_encoded
+        }
+    }
+
+    /// Decode value from bytes. Should not be overridden.
+    fn decode_value(value: &[u8]) -> MetadataModelResult<Self::PbType> {
+        use flate2::bufread::GzDecoder;
+        use prost::Message;
+
+        let mut decoder = GzDecoder::new(value);
+        let mut buf = Vec::new();
+
+        // If the value is compressed, decode it.
+        // This works because a protobuf-encoded message is never a valid gzip stream.
+        // https://stackoverflow.com/questions/63621784/can-a-protobuf-message-begin-with-a-gzip-magic-number
+        let value = if decoder.header().is_some() {
+            decoder
+                .read_to_end(&mut buf)
+                .context("failed to decode gzipped value")?;
+            buf.as_slice()
+        } else {
+            value
+        };
+
+        Self::PbType::decode(value).map_err(Into::into)
+    }
+
     /// `list` returns all records in this model.
     async fn list<S>(store: &S) -> MetadataModelResult<Vec<Self>>
     where
@@ -90,11 +143,7 @@ pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker
         let bytes_vec = store.list_cf(&Self::cf_name()).await?;
         bytes_vec
             .iter()
-            .map(|(_k, v)| {
-                Self::PbType::decode(v.as_slice())
-                    .map(Self::from_protobuf)
-                    .map_err(Into::into)
-            })
+            .map(|(_k, v)| Self::decode_value(v.as_slice()).map(Self::from_protobuf))
             .collect()
     }
 
@@ -105,11 +154,7 @@ pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker
         let bytes_vec = snapshot.list_cf(&Self::cf_name()).await?;
         bytes_vec
             .iter()
-            .map(|(_k, v)| {
-                Self::PbType::decode(v.as_slice())
-                    .map(Self::from_protobuf)
-                    .map_err(Into::into)
-            })
+            .map(|(_k, v)| Self::decode_value(v.as_slice()).map(Self::from_protobuf))
             .collect()
     }
 
@@ -121,8 +166,8 @@ pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker
         store
             .put_cf(
                 &Self::cf_name(),
-                self.key()?.encode_to_vec(),
-                self.to_protobuf().encode_to_vec(),
+                Self::encode_key(&self.key()?),
+                Self::encode_value(&self.to_protobuf()),
             )
             .await
             .map_err(Into::into)
@@ -134,7 +179,7 @@ pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker
         S: MetaStore,
     {
         store
-            .delete_cf(&Self::cf_name(), &key.encode_to_vec())
+            .delete_cf(&Self::cf_name(), &Self::encode_key(key))
             .await
             .map_err(Into::into)
     }
@@ -144,7 +189,7 @@ pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker
     where
         S: MetaStore,
     {
-        let byte_vec = match store.get_cf(&Self::cf_name(), &key.encode_to_vec()).await {
+        let byte_vec = match store.get_cf(&Self::cf_name(), &Self::encode_key(key)).await {
             Ok(byte_vec) => byte_vec,
             Err(err) => {
                 if !matches!(err, MetaStoreError::ItemNotFound(_)) {
@@ -153,7 +198,7 @@ pub trait MetadataModel: std::fmt::Debug + Sized + private::MetadataModelMarker
                 return Ok(None);
             }
         };
-        let model = Self::from_protobuf(Self::PbType::decode(byte_vec.as_slice())?);
+        let model = Self::from_protobuf(Self::decode_value(byte_vec.as_slice())?);
         Ok(Some(model))
     }
 }
@@ -209,14 +254,14 @@ where
     async fn upsert_in_transaction(&self, trx: &mut Transaction) -> MetadataModelResult<()> {
         trx.put(
             Self::cf_name(),
-            self.key()?.encode_to_vec(),
-            self.to_protobuf_encoded_vec(),
+            Self::encode_key(&self.key()?),
+            Self::encode_value(&self.to_protobuf()),
         );
         Ok(())
     }
 
     async fn delete_in_transaction(&self, trx: &mut Transaction) -> MetadataModelResult<()> {
-        trx.delete(Self::cf_name(), self.key()?.encode_to_vec());
+        trx.delete(Self::cf_name(), Self::encode_key(&self.key()?));
         Ok(())
     }
 }
@@ -653,6 +698,8 @@ impl<'a, K: Ord, V: PartialEq + Transactional<TXN>, TXN> ValTransaction<TXN>
 
 #[cfg(test)]
 mod tests {
+    use itertools::Itertools;
+
     use super::*;
     use crate::storage::Operation;
 
@@ -681,6 +728,53 @@ mod tests {
         }
     }
 
+    #[tokio::test]
+    async fn test_compress_decompress() {
+        use prost::Message;
+        use risingwave_pb::catalog::Database;
+
+        use crate::storage::MemStore;
+
+        async fn do_test(len: usize) {
+            // Use `Database` as a test model.
+            type Model = Database;
+
+            let store = MemStore::new();
+            let model = Model {
+                name: "t".repeat(len),
+                ..Default::default()
+            };
+            {
+                let encoded_len = model.encoded_len();
+                // Showing that the encoded length is larger than the original length.
+                // So that a len greater than the threshold will hit the compression branch.
+                assert!(encoded_len >= len, "encoded_len: {encoded_len}, len: {len}");
+            }
+            model.insert(&store).await.unwrap();
+
+            // Test `list`
+            let decoded = Model::list(&store)
+                .await
+                .unwrap()
+                .into_iter()
+                .exactly_one()
+                .unwrap();
+            assert_eq!(model, decoded);
+
+            // Test `select`
+            let decoded = Model::select(&store, &model.key().unwrap())
+                .await
+                .unwrap()
+                .into_iter()
+                .exactly_one()
+                .unwrap();
+            assert_eq!(model, decoded);
+        }
+
+        do_test(1).await;
+        do_test(MODEL_COMPRESSION_THRESHOLD + 1).await;
+    }
+
     #[tokio::test]
     async fn test_simple_var_transaction_commit() {
         let mut kv = TestTransactional {
diff --git a/src/object_store/Cargo.toml b/src/object_store/Cargo.toml
index 43a812081a36c..4fee5f953df5c 100644
--- a/src/object_store/Cargo.toml
+++ b/src/object_store/Cargo.toml
@@ -31,9 +31,21 @@ hyper-rustls = { version = "0.24.2", features = ["webpki-roots"] }
 hyper-tls = "0.5.0"
 itertools = { workspace = true }
 madsim = "0.2.27"
-opendal = "0.45.1"
+opendal = { version = "0.47", features = [
+    "executors-tokio",
+    "services-azblob",
+    "services-fs",
+    "services-gcs",
+    "services-memory",
+    "services-obs",
+    "services-oss",
+    "services-s3",
+    "services-webhdfs",
+    "services-azfile",
+    # "service-hdfs",
+] }
 prometheus = { version = "0.13", features = ["process"] }
-reqwest = "0.11"                                                              # required by opendal
+reqwest = "0.12.2" # required by opendal
 risingwave_common = { workspace = true }
 rustls = "0.23.5"
 spin = "0.9"
diff --git a/src/object_store/src/object/mod.rs b/src/object_store/src/object/mod.rs
index e5e9cb9661262..c701a438253ae 100644
--- a/src/object_store/src/object/mod.rs
+++ b/src/object_store/src/object/mod.rs
@@ -366,20 +366,14 @@ pub struct MonitoredStreamingUploader<U: StreamingUploader> {
     object_store_metrics: Arc<ObjectStoreMetrics>,
     /// Length of data uploaded with this uploader.
     operation_size: usize,
-    media_type: &'static str,
 }
 
 impl<U: StreamingUploader> MonitoredStreamingUploader<U> {
-    pub fn new(
-        media_type: &'static str,
-        handle: U,
-        object_store_metrics: Arc<ObjectStoreMetrics>,
-    ) -> Self {
+    pub fn new(handle: U, object_store_metrics: Arc<ObjectStoreMetrics>) -> Self {
         Self {
             inner: handle,
             object_store_metrics,
             operation_size: 0,
-            media_type,
         }
     }
 }
@@ -392,27 +386,16 @@ impl<U: StreamingUploader> MonitoredStreamingUploader<U> {
         let operation_type_str = operation_type.as_str();
         let data_len = data.len();
 
-        let res = if self.media_type == "s3" {
+        let res =
             // TODO: we should avoid this special case after fully migrating to opeandal for s3.
             self.inner
                 .write_bytes(data)
                 .verbose_instrument_await(operation_type_str)
-                .await
-        } else {
-            let _timer = self
-                .object_store_metrics
-                .operation_latency
-                .with_label_values(&[self.media_type, operation_type_str])
-                .start_timer();
-
-            self.inner
-                .write_bytes(data)
-                .verbose_instrument_await(operation_type_str)
-                .await
-        };
+                .await;
 
         try_update_failure_metric(&self.object_store_metrics, &res, operation_type_str);
 
+        // duration metrics is collected and reported inside the specific implementation of the streaming uploader.
         self.object_store_metrics
             .write_bytes
             .inc_by(data_len as u64);
@@ -429,26 +412,16 @@ impl<U: StreamingUploader> MonitoredStreamingUploader<U> {
         let operation_type = OperationType::StreamingUploadFinish;
         let operation_type_str = operation_type.as_str();
 
-        let res = if self.media_type == "s3" {
+        let res =
             // TODO: we should avoid this special case after fully migrating to opeandal for s3.
             self.inner
                 .finish()
                 .verbose_instrument_await(operation_type_str)
-                .await
-        } else {
-            let _timer = self
-                .object_store_metrics
-                .operation_latency
-                .with_label_values(&[self.media_type, operation_type_str])
-                .start_timer();
-
-            self.inner
-                .finish()
-                .verbose_instrument_await(operation_type_str)
-                .await
-        };
+                .await;
 
         try_update_failure_metric(&self.object_store_metrics, &res, operation_type_str);
+
+        // duration metrics is collected and reported inside the specific implementation of the streaming uploader.
         self.object_store_metrics
             .operation_size
             .with_label_values(&[operation_type_str])
@@ -641,7 +614,6 @@ impl<OS: ObjectStore> MonitoredObjectStore<OS> {
         try_update_failure_metric(&self.object_store_metrics, &res, operation_type_str);
 
         Ok(MonitoredStreamingUploader::new(
-            media_type,
             res?,
             self.object_store_metrics.clone(),
         ))
@@ -868,9 +840,13 @@ pub async fn build_remote_object_store(
                 let bucket = s3.strip_prefix("s3://").unwrap();
                 tracing::info!("Using OpenDAL to access s3, bucket is {}", bucket);
                 ObjectStoreImpl::Opendal(
-                    OpendalObjectStore::new_s3_engine(bucket.to_string(), config.clone())
-                        .unwrap()
-                        .monitored(metrics, config),
+                    OpendalObjectStore::new_s3_engine(
+                        bucket.to_string(),
+                        config.clone(),
+                        metrics.clone(),
+                    )
+                    .unwrap()
+                    .monitored(metrics, config),
                 )
             } else {
                 ObjectStoreImpl::S3(
@@ -893,6 +869,7 @@ pub async fn build_remote_object_store(
                     namenode.to_string(),
                     root.to_string(),
                     config.clone(),
+                    metrics.clone(),
                 )
                 .unwrap()
                 .monitored(metrics, config),
@@ -906,6 +883,7 @@ pub async fn build_remote_object_store(
                     bucket.to_string(),
                     root.to_string(),
                     config.clone(),
+                    metrics.clone(),
                 )
                 .unwrap()
                 .monitored(metrics, config),
@@ -919,6 +897,7 @@ pub async fn build_remote_object_store(
                     bucket.to_string(),
                     root.to_string(),
                     config.clone(),
+                    metrics.clone(),
                 )
                 .unwrap()
                 .monitored(metrics, config),
@@ -933,6 +912,7 @@ pub async fn build_remote_object_store(
                     bucket.to_string(),
                     root.to_string(),
                     config.clone(),
+                    metrics.clone(),
                 )
                 .unwrap()
                 .monitored(metrics, config),
@@ -946,6 +926,7 @@ pub async fn build_remote_object_store(
                     namenode.to_string(),
                     root.to_string(),
                     config.clone(),
+                    metrics.clone(),
                 )
                 .unwrap()
                 .monitored(metrics, config),
@@ -959,6 +940,7 @@ pub async fn build_remote_object_store(
                     container_name.to_string(),
                     root.to_string(),
                     config.clone(),
+                    metrics.clone(),
                 )
                 .unwrap()
                 .monitored(metrics, config),
@@ -967,7 +949,7 @@ pub async fn build_remote_object_store(
         fs if fs.starts_with("fs://") => {
             let fs = fs.strip_prefix("fs://").unwrap();
             ObjectStoreImpl::Opendal(
-                OpendalObjectStore::new_fs_engine(fs.to_string(), config.clone())
+                OpendalObjectStore::new_fs_engine(fs.to_string(), config.clone(), metrics.clone())
                     .unwrap()
                     .monitored(metrics, config),
             )
@@ -983,7 +965,7 @@ pub async fn build_remote_object_store(
             if config.s3.developer.use_opendal {
                 tracing::info!("Using OpenDAL to access minio.");
                 ObjectStoreImpl::Opendal(
-                    OpendalObjectStore::new_minio_engine(minio, config.clone())
+                    OpendalObjectStore::new_minio_engine(minio, config.clone(), metrics.clone())
                         .unwrap()
                         .monitored(metrics, config),
                 )
diff --git a/src/object_store/src/object/opendal_engine/azblob.rs b/src/object_store/src/object/opendal_engine/azblob.rs
index 590859eaaa706..e584e59aafe8b 100644
--- a/src/object_store/src/object/opendal_engine/azblob.rs
+++ b/src/object_store/src/object/opendal_engine/azblob.rs
@@ -20,6 +20,7 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::ObjectResult;
 
 const AZBLOB_ENDPOINT: &str = "AZBLOB_ENDPOINT";
@@ -29,6 +30,7 @@ impl OpendalObjectStore {
         container_name: String,
         root: String,
         config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
     ) -> ObjectResult<Self> {
         // Create azblob backend builder.
         let mut builder = Azblob::default();
@@ -47,6 +49,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Azblob,
             config,
+            metrics,
         })
     }
 }
diff --git a/src/object_store/src/object/opendal_engine/fs.rs b/src/object_store/src/object/opendal_engine/fs.rs
index ecb1131f0def8..2edaaa44d6bbe 100644
--- a/src/object_store/src/object/opendal_engine/fs.rs
+++ b/src/object_store/src/object/opendal_engine/fs.rs
@@ -20,12 +20,17 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
 use crate::object::ObjectResult;
 
 impl OpendalObjectStore {
     /// create opendal fs engine.
-    pub fn new_fs_engine(root: String, config: Arc<ObjectStoreConfig>) -> ObjectResult<Self> {
+    pub fn new_fs_engine(
+        root: String,
+        config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
+    ) -> ObjectResult<Self> {
         // Create fs backend builder.
         let mut builder = Fs::default();
         builder.root(&root);
@@ -41,6 +46,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Fs,
             config,
+            metrics,
         })
     }
 }
diff --git a/src/object_store/src/object/opendal_engine/gcs.rs b/src/object_store/src/object/opendal_engine/gcs.rs
index c55de2377202e..a3876b30ef564 100644
--- a/src/object_store/src/object/opendal_engine/gcs.rs
+++ b/src/object_store/src/object/opendal_engine/gcs.rs
@@ -20,6 +20,7 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::ObjectResult;
 
 impl OpendalObjectStore {
@@ -28,6 +29,7 @@ impl OpendalObjectStore {
         bucket: String,
         root: String,
         config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
     ) -> ObjectResult<Self> {
         // Create gcs backend builder.
         let mut builder = Gcs::default();
@@ -49,6 +51,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Gcs,
             config,
+            metrics,
         })
     }
 }
diff --git a/src/object_store/src/object/opendal_engine/hdfs.rs b/src/object_store/src/object/opendal_engine/hdfs.rs
index 8c1e16eda1f57..28c4cf33b51b0 100644
--- a/src/object_store/src/object/opendal_engine/hdfs.rs
+++ b/src/object_store/src/object/opendal_engine/hdfs.rs
@@ -18,6 +18,7 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
 use crate::object::ObjectResult;
 
@@ -26,7 +27,8 @@ impl OpendalObjectStore {
     pub fn new_hdfs_engine(
         namenode: String,
         root: String,
-        config: ObjectStoreConfig,
+        config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
     ) -> ObjectResult<Self> {
         // Create hdfs backend builder.
         let mut builder = Hdfs::default();
@@ -43,6 +45,8 @@ impl OpendalObjectStore {
         Ok(Self {
             op,
             engine_type: EngineType::Hdfs,
+            config,
+            metrics,
         })
     }
 }
diff --git a/src/object_store/src/object/opendal_engine/obs.rs b/src/object_store/src/object/opendal_engine/obs.rs
index 77178ca9ae7bc..03919ec57d37c 100644
--- a/src/object_store/src/object/opendal_engine/obs.rs
+++ b/src/object_store/src/object/opendal_engine/obs.rs
@@ -20,6 +20,7 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::ObjectResult;
 
 impl OpendalObjectStore {
@@ -28,6 +29,7 @@ impl OpendalObjectStore {
         bucket: String,
         root: String,
         config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
     ) -> ObjectResult<Self> {
         // Create obs backend builder.
         let mut builder = Obs::default();
@@ -55,6 +57,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Obs,
             config,
+            metrics,
         })
     }
 }
diff --git a/src/object_store/src/object/opendal_engine/opendal_object_store.rs b/src/object_store/src/object/opendal_engine/opendal_object_store.rs
index 0d946e95d43f1..6ea0cbb6fe8f0 100644
--- a/src/object_store/src/object/opendal_engine/opendal_object_store.rs
+++ b/src/object_store/src/object/opendal_engine/opendal_object_store.rs
@@ -18,17 +18,19 @@ use std::time::Duration;
 
 use bytes::Bytes;
 use fail::fail_point;
-use futures::{stream, StreamExt, TryStreamExt};
+use futures::{stream, StreamExt};
 use opendal::layers::{RetryLayer, TimeoutLayer};
+use opendal::raw::BoxedStaticFuture;
 use opendal::services::Memory;
-use opendal::{Metakey, Operator, Writer};
+use opendal::{Execute, Executor, Metakey, Operator, Writer};
 use risingwave_common::config::ObjectStoreConfig;
 use risingwave_common::range::RangeBoundsExt;
 use thiserror_ext::AsReport;
 
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::{
     prefix, ObjectDataStream, ObjectError, ObjectMetadata, ObjectMetadataIter, ObjectRangeBounds,
-    ObjectResult, ObjectStore, StreamingUploader,
+    ObjectResult, ObjectStore, OperationType, StreamingUploader,
 };
 
 /// Opendal object storage.
@@ -38,6 +40,7 @@ pub struct OpendalObjectStore {
     pub(crate) engine_type: EngineType,
 
     pub(crate) config: Arc<ObjectStoreConfig>,
+    pub(crate) metrics: Arc<ObjectStoreMetrics>,
 }
 
 #[derive(Clone)]
@@ -64,6 +67,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Memory,
             config: Arc::new(ObjectStoreConfig::default()),
+            metrics: Arc::new(ObjectStoreMetrics::unused()),
         })
     }
 }
@@ -99,10 +103,14 @@ impl ObjectStore for OpendalObjectStore {
     }
 
     async fn streaming_upload(&self, path: &str) -> ObjectResult<Self::StreamingUploader> {
-        Ok(
-            OpendalStreamingUploader::new(self.op.clone(), path.to_string(), self.config.clone())
-                .await?,
+        Ok(OpendalStreamingUploader::new(
+            self.op.clone(),
+            path.to_string(),
+            self.config.clone(),
+            self.metrics.clone(),
+            self.store_media_type(),
         )
+        .await?)
     }
 
     async fn read(&self, path: &str, range: impl ObjectRangeBounds) -> ObjectResult<Bytes> {
@@ -127,7 +135,7 @@ impl ObjectStore for OpendalObjectStore {
             )));
         }
 
-        Ok(Bytes::from(data))
+        Ok(data.to_bytes())
     }
 
     /// Returns a stream reading the object specified in `path`. If given, the stream starts at the
@@ -142,9 +150,17 @@ impl ObjectStore for OpendalObjectStore {
             ObjectError::internal("opendal streaming read error")
         ));
         let range: Range<u64> = (range.start as u64)..(range.end as u64);
+
+        // The layer specified first will be executed first.
+        // `TimeoutLayer` must be specified before `RetryLayer`.
+        // Otherwise, it will lead to bad state inside OpenDAL and panic.
+        // See https://docs.rs/opendal/latest/opendal/layers/struct.RetryLayer.html#panics
         let reader = self
             .op
             .clone()
+            .layer(TimeoutLayer::new().with_io_timeout(Duration::from_millis(
+                self.config.retry.streaming_read_attempt_timeout_ms,
+            )))
             .layer(
                 RetryLayer::new()
                     .with_min_delay(Duration::from_millis(
@@ -157,16 +173,13 @@ impl ObjectStore for OpendalObjectStore {
                     .with_factor(self.config.retry.req_backoff_factor as f32)
                     .with_jitter(),
             )
-            .layer(TimeoutLayer::new().with_io_timeout(Duration::from_millis(
-                self.config.retry.streaming_read_attempt_timeout_ms,
-            )))
             .reader_with(path)
-            .range(range)
             .await?;
-        let stream = reader.into_stream().map(|item| {
-            item.map_err(|e| {
-                ObjectError::internal(format!("reader into_stream fail {}", e.as_report()))
-            })
+        let stream = reader.into_bytes_stream(range).await?.map(|item| {
+            item.map(|b| Bytes::copy_from_slice(b.as_ref()))
+                .map_err(|e| {
+                    ObjectError::internal(format!("reader into_stream fail {}", e.as_report()))
+                })
         });
 
         Ok(Box::pin(stream))
@@ -254,19 +267,75 @@ impl ObjectStore for OpendalObjectStore {
     }
 }
 
+struct OpendalStreamingUploaderExecute {
+    /// To record metrics for uploading part.
+    metrics: Arc<ObjectStoreMetrics>,
+    media_type: &'static str,
+}
+
+impl OpendalStreamingUploaderExecute {
+    const STREAMING_UPLOAD_TYPE: OperationType = OperationType::StreamingUpload;
+
+    fn new(metrics: Arc<ObjectStoreMetrics>, media_type: &'static str) -> Self {
+        Self {
+            metrics,
+            media_type,
+        }
+    }
+}
+
+impl Execute for OpendalStreamingUploaderExecute {
+    fn execute(&self, f: BoxedStaticFuture<()>) {
+        let operation_type_str = Self::STREAMING_UPLOAD_TYPE.as_str();
+        let media_type = self.media_type;
+
+        let metrics = self.metrics.clone();
+        let _handle = tokio::spawn(async move {
+            let _timer = metrics
+                .operation_latency
+                .with_label_values(&[media_type, operation_type_str])
+                .start_timer();
+
+            f.await
+        });
+    }
+}
+
 /// Store multiple parts in a map, and concatenate them on finish.
 pub struct OpendalStreamingUploader {
     writer: Writer,
+    /// Buffer for data. It will store at least `UPLOAD_BUFFER_SIZE` bytes of data before wrapping itself
+    /// into a stream and upload to object store as a part.
+    buf: Vec<Bytes>,
+    /// Length of the data that have not been uploaded to object store.
+    not_uploaded_len: usize,
+    /// Whether the writer is valid. The writer is invalid after abort/close.
+    is_valid: bool,
+
+    abort_on_err: bool,
 }
 
 impl OpendalStreamingUploader {
+    const UPLOAD_BUFFER_SIZE: usize = 16 * 1024 * 1024;
+
     pub async fn new(
         op: Operator,
         path: String,
         config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
+        media_type: &'static str,
     ) -> ObjectResult<Self> {
+        let monitored_execute = OpendalStreamingUploaderExecute::new(metrics, media_type);
+
+        // The layer specified first will be executed first.
+        // `TimeoutLayer` must be specified before `RetryLayer`.
+        // Otherwise, it will lead to bad state inside OpenDAL and panic.
+        // See https://docs.rs/opendal/latest/opendal/layers/struct.RetryLayer.html#panics
         let writer = op
             .clone()
+            .layer(TimeoutLayer::new().with_io_timeout(Duration::from_millis(
+                config.retry.streaming_upload_attempt_timeout_ms,
+            )))
             .layer(
                 RetryLayer::new()
                     .with_min_delay(Duration::from_millis(config.retry.req_backoff_interval_ms))
@@ -275,34 +344,64 @@ impl OpendalStreamingUploader {
                     .with_factor(config.retry.req_backoff_factor as f32)
                     .with_jitter(),
             )
-            .layer(TimeoutLayer::new().with_io_timeout(Duration::from_millis(
-                config.retry.streaming_upload_attempt_timeout_ms,
-            )))
             .writer_with(&path)
-            .concurrent(8)
-            .buffer(OPENDAL_BUFFER_SIZE)
+            .concurrent(config.opendal_upload_concurrency)
+            .executor(Executor::with(monitored_execute))
             .await?;
-        Ok(Self { writer })
+        Ok(Self {
+            writer,
+            buf: vec![],
+            not_uploaded_len: 0,
+            is_valid: true,
+            abort_on_err: config.opendal_writer_abort_on_err,
+        })
     }
-}
 
-const OPENDAL_BUFFER_SIZE: usize = 16 * 1024 * 1024;
+    async fn flush(&mut self) -> ObjectResult<()> {
+        let data: Vec<Bytes> = self.buf.drain(..).collect();
+        debug_assert_eq!(
+            data.iter().map(|b| b.len()).sum::<usize>(),
+            self.not_uploaded_len
+        );
+        if let Err(err) = self.writer.write(data).await {
+            self.is_valid = false;
+            if self.abort_on_err {
+                self.writer.abort().await?;
+            }
+            return Err(err.into());
+        }
+        self.not_uploaded_len = 0;
+        Ok(())
+    }
+}
 
 impl StreamingUploader for OpendalStreamingUploader {
     async fn write_bytes(&mut self, data: Bytes) -> ObjectResult<()> {
-        self.writer.write(data).await?;
-
+        assert!(self.is_valid);
+        self.not_uploaded_len += data.len();
+        self.buf.push(data);
+        if self.not_uploaded_len >= Self::UPLOAD_BUFFER_SIZE {
+            self.flush().await?;
+        }
         Ok(())
     }
 
     async fn finish(mut self) -> ObjectResult<()> {
+        assert!(self.is_valid);
+        if self.not_uploaded_len > 0 {
+            self.flush().await?;
+        }
+
+        assert!(self.buf.is_empty());
+        assert_eq!(self.not_uploaded_len, 0);
+
+        self.is_valid = false;
         match self.writer.close().await {
             Ok(_) => (),
             Err(err) => {
-                // Due to a bug in OpenDAL, calling `abort()` here may trigger unreachable code and cause panic.
-                // refer to https://github.com/apache/opendal/issues/4651
-                // This will be fixed after the next bump in the opendal version.
-                // self.writer.abort().await?;
+                if self.abort_on_err {
+                    self.writer.abort().await?;
+                }
                 return Err(err.into());
             }
         };
@@ -311,12 +410,14 @@ impl StreamingUploader for OpendalStreamingUploader {
     }
 
     fn get_memory_usage(&self) -> u64 {
-        OPENDAL_BUFFER_SIZE as u64
+        Self::UPLOAD_BUFFER_SIZE as u64
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use stream::TryStreamExt;
+
     use super::*;
 
     async fn list_all(prefix: &str, store: &OpendalObjectStore) -> Vec<ObjectMetadata> {
@@ -341,15 +442,23 @@ mod tests {
         let bytes = store.read("/abc", 4..6).await.unwrap();
         assert_eq!(String::from_utf8(bytes.to_vec()).unwrap(), "56".to_string());
 
-        // Overflow.
-        store.read("/abc", 4..44).await.unwrap_err();
-
         store.delete("/abc").await.unwrap();
 
         // No such object.
         store.read("/abc", 0..3).await.unwrap_err();
     }
 
+    #[tokio::test]
+    #[should_panic]
+    async fn test_memory_read_overflow() {
+        let block = Bytes::from("123456");
+        let store = OpendalObjectStore::test_new_memory_engine().unwrap();
+        store.upload("/abc", block).await.unwrap();
+
+        // Overflow.
+        store.read("/abc", 4..44).await.unwrap_err();
+    }
+
     #[tokio::test]
     async fn test_memory_metadata() {
         let block = Bytes::from("123456");
diff --git a/src/object_store/src/object/opendal_engine/opendal_s3.rs b/src/object_store/src/object/opendal_engine/opendal_s3.rs
index e86a209f4f3fa..5ba90ad93ccba 100644
--- a/src/object_store/src/object/opendal_engine/opendal_s3.rs
+++ b/src/object_store/src/object/opendal_engine/opendal_s3.rs
@@ -22,11 +22,16 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::ObjectResult;
 
 impl OpendalObjectStore {
     /// create opendal s3 engine.
-    pub fn new_s3_engine(bucket: String, config: Arc<ObjectStoreConfig>) -> ObjectResult<Self> {
+    pub fn new_s3_engine(
+        bucket: String,
+        config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
+    ) -> ObjectResult<Self> {
         // Create s3 builder.
         let mut builder = S3::default();
         builder.bucket(&bucket);
@@ -50,11 +55,16 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::S3,
             config,
+            metrics,
         })
     }
 
     /// Creates a minio client. The server should be like `minio://key:secret@address:port/bucket`.
-    pub fn new_minio_engine(server: &str, config: Arc<ObjectStoreConfig>) -> ObjectResult<Self> {
+    pub fn new_minio_engine(
+        server: &str,
+        config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
+    ) -> ObjectResult<Self> {
         let server = server.strip_prefix("minio://").unwrap();
         let (access_key_id, rest) = server.split_once(':').unwrap();
         let (secret_access_key, mut rest) = rest.split_once('@').unwrap();
@@ -89,6 +99,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Minio,
             config,
+            metrics,
         })
     }
 
@@ -111,6 +122,7 @@ impl OpendalObjectStore {
     pub fn new_s3_engine_with_credentials(
         bucket: &str,
         config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
         aws_access_key_id: &str,
         aws_secret_access_key: &str,
         aws_region: &str,
@@ -135,6 +147,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::S3,
             config,
+            metrics,
         })
     }
 }
diff --git a/src/object_store/src/object/opendal_engine/oss.rs b/src/object_store/src/object/opendal_engine/oss.rs
index 70fd6628f29b0..c4fc5d500b11e 100644
--- a/src/object_store/src/object/opendal_engine/oss.rs
+++ b/src/object_store/src/object/opendal_engine/oss.rs
@@ -20,6 +20,7 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::ObjectResult;
 
 impl OpendalObjectStore {
@@ -28,6 +29,7 @@ impl OpendalObjectStore {
         bucket: String,
         root: String,
         config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
     ) -> ObjectResult<Self> {
         // Create oss backend builder.
         let mut builder = Oss::default();
@@ -55,6 +57,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Oss,
             config,
+            metrics,
         })
     }
 }
diff --git a/src/object_store/src/object/opendal_engine/webhdfs.rs b/src/object_store/src/object/opendal_engine/webhdfs.rs
index cb8a2ad1753b3..f083102a3ed21 100644
--- a/src/object_store/src/object/opendal_engine/webhdfs.rs
+++ b/src/object_store/src/object/opendal_engine/webhdfs.rs
@@ -20,6 +20,7 @@ use opendal::Operator;
 use risingwave_common::config::ObjectStoreConfig;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::object_metrics::ObjectStoreMetrics;
 use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
 use crate::object::ObjectResult;
 
@@ -29,6 +30,7 @@ impl OpendalObjectStore {
         endpoint: String,
         root: String,
         config: Arc<ObjectStoreConfig>,
+        metrics: Arc<ObjectStoreMetrics>,
     ) -> ObjectResult<Self> {
         // Create webhdfs backend builder.
         let mut builder = Webhdfs::default();
@@ -47,6 +49,7 @@ impl OpendalObjectStore {
             op,
             engine_type: EngineType::Webhdfs,
             config,
+            metrics,
         })
     }
 }
diff --git a/src/prost/build.rs b/src/prost/build.rs
index 4e939f46abb63..6d31201fa4733 100644
--- a/src/prost/build.rs
+++ b/src/prost/build.rs
@@ -112,7 +112,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         // The requirement is from Source node -> SourceCatalog -> WatermarkDesc -> expr
         .type_attribute("catalog.WatermarkDesc", "#[derive(Eq, Hash)]")
         .type_attribute("catalog.StreamSourceInfo", "#[derive(Eq, Hash)]")
-        .type_attribute("catalog.SecretRef", "#[derive(Eq, Hash)]")
+        .type_attribute("secret.SecretRef", "#[derive(Eq, Hash)]")
         .type_attribute("catalog.IndexColumnProperties", "#[derive(Eq, Hash)]")
         .type_attribute("expr.ExprNode", "#[derive(Eq, Hash)]")
         .type_attribute("data.DataType", "#[derive(Eq, Hash)]")
diff --git a/src/risedevtool/src/risedev_env.rs b/src/risedevtool/src/risedev_env.rs
index 943f37abf655d..bff4062f72097 100644
--- a/src/risedevtool/src/risedev_env.rs
+++ b/src/risedevtool/src/risedev_env.rs
@@ -78,13 +78,9 @@ pub fn generate_risedev_env(services: &Vec<ServiceConfig>) -> String {
                 writeln!(env, r#"RPK_BROKERS="{brokers}""#).unwrap();
             }
             ServiceConfig::SchemaRegistry(c) => {
-                let address = &c.address;
-                let port = &c.port;
-                writeln!(
-                    env,
-                    r#"RISEDEV_SCHEMA_REGISTRY_URL="http://{address}:{port}""#,
-                )
-                .unwrap();
+                let url = format!("http://{}:{}", c.address, c.port);
+                writeln!(env, r#"RISEDEV_SCHEMA_REGISTRY_URL="{url}""#,).unwrap();
+                writeln!(env, r#"RPK_REGISTRY_HOSTS="{url}""#).unwrap();
             }
             ServiceConfig::MySql(c) if c.application != Application::Metastore => {
                 let host = &c.address;
diff --git a/src/stream/src/executor/sink.rs b/src/stream/src/executor/sink.rs
index f7c7770c0bd7d..c0ee1330805bf 100644
--- a/src/stream/src/executor/sink.rs
+++ b/src/stream/src/executor/sink.rs
@@ -185,6 +185,9 @@ impl<F: LogStoreFactory> SinkExecutor<F> {
             && !self.sink_param.downstream_pk.is_empty();
         // Don't compact chunk for blackhole sink for better benchmark performance.
         let compact_chunk = !self.sink.is_blackhole();
+        tracing::info!("Sink info: sink_id: {} actor_id: {}, need_advance_delete: {}, re_construct_with_sink_pk: {}",
+                        sink_id, actor_id, need_advance_delete, re_construct_with_sink_pk);
+
         let processed_input = Self::process_msg(
             input,
             self.sink_param.sink_type,