diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 00000000..0fcdfdf5 --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,56 @@ +name: spellcheck +on: + pull_request: + +jobs: + run: + name: Spell Check with Typos + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: typos-action + id: typos-output + uses: crate-ci/typos@v1.27.3 + with: + config: ./typos.toml + + - name: Find Comment + if: ${{ failure() }} + uses: peter-evans/find-comment@v3 + id: find-comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: "github-actions[bot]" + body-includes: The CI check for spelling has failed + + - name: Create comment on PR if typos fail + if: ${{ failure() && steps.find-comment.outputs.comment-id == '' }} + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.pull_request.number }} + body: | + ### CI Check Failed + The CI check for spelling has failed. Please review the errors and correct any spelling mistakes. + + For more errors and details, you can check the [CI Log](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) or you can install [typos](https://github.com/crate-ci/typos?tab=readme-ov-file#install) and run `typos` locally to check for and fix spelling issues. + + - name: Update comment if typos fail + if: ${{ failure() && steps.find-comment.outputs.comment-id != '' }} + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: | + ### CI Check Failed + The CI check for spelling has failed. Please review the errors and correct any spelling mistakes. + + For more errors and details, you can check the [CI Log](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) or you can install [typos](https://github.com/crate-ci/typos?tab=readme-ov-file#install) and run `typos` locally to check for and fix spelling issues. + edit-mode: replace + + - name: Check Spelling + uses: rojopolis/spellcheck-github-actions@0.35.0 + with: + config_path: .spellcheck.yml + task_name: Markdown diff --git a/.spellcheck.yml b/.spellcheck.yml new file mode 100644 index 00000000..35738429 --- /dev/null +++ b/.spellcheck.yml @@ -0,0 +1,23 @@ +matrix: + - name: Markdown + apsell: + ignore-case: true + lang: en + dictionary: + wordlists: + - .wordlist.txt + output: wordlist.dic + encoding: utf-8 + pipeline: + - pyspelling.filters.markdown: + markdown_extensions: + - pymdownx.superfences + - pyspelling.filters.html: + comments: false + ignores: + - code + - pre + sources: + - "*.mdx" + - "*.md" + default_encoding: utf-8 diff --git a/.wordlist.txt b/.wordlist.txt new file mode 100644 index 00000000..bbe69bf2 --- /dev/null +++ b/.wordlist.txt @@ -0,0 +1,230 @@ +https +github +risingwavelabs +RisingWave +Redpanda +Kinesis +Astra +Debezium +debezium +JSON +struct +varchar +TabItem +RailroadDiagram +rr +SSL +SASL +OAUTHBEARER +Docusaurus +docusaurus +Postgres +postgres +datagen +Grafana +Etcd +MinIO +CMake +OpenSSL +psql +RiseDev +Tmux +Kubernetes +frontend +NodePort +kubectl +uptime +Avro +Protobuf +Prebuilt +Observability +CSV +DML +Alluxio +Superset +DBeaver +Jupyter +Metabase +Clickhouse +CockroachDB +DataStax +Pinot +TiDB +Hudi +Trino +Airbyte +Fivetran +Hightouch +dbt +ELT +ETL +DataStax +StreamNative +integrations +macOS +quickstart +substring +substrings +gz +dev +CTEs +namespace +deserialization +scalability +changelog +failover +risingwave +sql +js +rw +pgwire +json +mv +mysql +Redash +JDBC +Redash +analytics +Flink +JVM +APIs +stateful +runtime +disaggregated +PrivateLink +VPCs +VPC +DataSet +FlinkSQL +LSM +natively +ad-hoc +hoc +RocksDB +checkpointing +checkpointed +UDF +APIs +DAGs +acyclic +MapReduce +dataflow +pipelined +RisingWave's +Redash +TiCDC +upsert +JSONB +boolean +Citus +CLI +Chandy +OpenDAL +WebHDFS +ChatGPT +clickstream +cryptocurrency +dataset +HDFS +flink +Flink's +Homebrew +IoT +Lamport +microservice +microservices +multibyte +protobuf +Protobuf +timestamptz +timestamptzs +unary +zstd +http +pre +toc +latencies +thoroughputs +VPC +bigint +bytea +TopN +UDFs +avro +kafka +Paimon +TPC +Greenplum +updateable +ClickHouse +JetStream +MSK +msk +NATS +ScyllaDB +OOM +DataGrip +PgAdmin +clickhouse +Supabase +BigQuery +transactional +OLAP +ksqlDB +backfilling +GraphQL +src +img +jpg +StarRocks +starrocks +md +Bytebase +GCS +gcs +faq +OLTP +Napa +superset +Napa +www +DDL +backfill +backfills +MVs +Nats +Psycopg +Datadog +Hasura +Liquibase +EMQX +HiveMQ +MQTT +RabbitMQ +Standalone's +localhost +prometheus +datasources +OpenSearch +codebase +Databricks +SDKs +sdk +RWUs +roadmap +terraform +Serverless +WASM +schemas +risingwavecloud +Rockset +personalization +DefaultButton +LightButton +VoteNotify +SharedMergeTree +JWT +TOML +mintlify +customizations +repo \ No newline at end of file diff --git a/README.md b/README.md index bd4f57f5..26b95786 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,31 @@ # Note -This repository is now published to: https://risingwavelabs.mintlify.app/docs/current/intro/introduction, and will be switched to our documentation domain once all testing and customizations are completed. +This repository contains the latest RisingWave documentation. [The old repository](https://github.com/risingwavelabs/risingwave-docs-legacy) now hosts the archived documentation up to v2.0 of RisingWave. + +# Documentation structure + +Below are the main topic groups. Some groups are elevated to be tabs shown on the top of a documentation page. + +- get-started +- demos +- sql +- ingestion +- processing +- delivery +- deploy +- operate +- python-sdk +- client-libraries +- performance +- troubleshoot +- integrations +- faq +- reference +- cloud +- changelog - -# Mintlify Starter Kit - -Click on `Use this template` to copy the Mintlify starter kit. The starter kit contains examples including - -- Guide pages -- Navigation -- Customizations -- API Reference pages -- Use of popular components - ### Development Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command diff --git a/changelog/product-lifecycle.mdx b/changelog/product-lifecycle.mdx index d8d3d618..68168e13 100644 --- a/changelog/product-lifecycle.mdx +++ b/changelog/product-lifecycle.mdx @@ -20,38 +20,40 @@ As introduced above, when you see a "Public preview" note in the documentation, Below is a list of all features in the public preview phase: -| Feature name | Start date | Start version | -| --- | --- | --- | -| [Partitioned Postgres CDC table](/docs/current/ingest-from-postgres-cdc/) | 2024.9 | 2.0 | -| [Map type](/docs/current/data-type-map/) | 2024.8 | 2.0 | -| [Azure Blob sink](/docs/current/sink-to-azure-blob/) | 2024.8 | 2.0 | -| [Approx percentile](/docs/current/sql-function-aggregate/#approx_percentile) | 2024.8 | 2.0 | -| [Auto schema change in MySQL CDC](/docs/current/ingest-from-mysql-cdc/#automatically-change-schema) | 2024.8 | 2.0 | -| [SQL Server CDC source](/docs/current/ingest-from-sqlserver-cdc/) | 2024.8 | 2.0 | -| [Sink data in parquet format](/docs/current/data-delivery/#sink-data-in-parquet-format) | 2024.8 | 2.0 | -| [Time travel queries](/docs/current/time-travel-queries/) | 2024.7 | 2.0 | -| [Manage secrets](/docs/current/manage-secrets/) | 2024.7 | 2.0 | -| [Amazon DynamoDB sink](../integrations/destinations/amazon-dynamodb) | 2024.6 | 1.10 | -| Auto-map upstream table schema in [MySQL CDC](/docs/current/ingest-from-mysql-cdc/#automatically-map-upstream-table-schema) and [PostgreSQL CDC](/docs/current/ingest-from-postgres-cdc/#automatically-map-upstream-table-schema) | 2024.6 | 1.10 | -| [Version column](/docs/current/sql-create-table/) | 2024.6 | 1.9 | -| [Snowflake sink](/docs/current/sink-to-snowflake/) | 2024.5 | 1.9 | -| [Subscription](/docs/current/subscription/) | 2024.5 | 1.9 | -| [RisingWave as PostgreSQL FDW](/docs/current/risingwave-as-postgres-fdw/) | 2024.4 | 1.9 | -| [Iceberg source](/docs/current/ingest-from-iceberg/) | 2024.3 | 1.8 | -| [Google BigQuery sink](/docs/current/sink-to-bigquery/) | 2023.11 | 1.4 | -| [SET BACKGROUND\_DDL command](/docs/current/sql-set-background-ddl/) | 2023.10 | 1.3 | -| [Decouple sinks](/docs/current/data-delivery/#sink-decoupling) | 2023.10 | 1.3 | -| [Pulsar sink](/docs/current/sink-to-pulsar/) | 2023.10 | 1.3 | -| [Cassandra sink](/docs/current/sink-to-cassandra/) | 2023.9 | 1.2 | -| [Elasticsearch sink](/docs/current/sink-to-elasticsearch/) | 2023.9 | 1.2 | -| [NATS sink](/docs/current/sink-to-nats/) | 2023.9 | 1.2 | -| [NATS source](/docs/current/ingest-from-nats/) | 2023.9 | 1.2 | -| [Append-only tables](/docs/current/sql-create-table/) | 2023.8 | 1.1 | -| [Emit on window close](/docs/current/emit-on-window-close/) | 2023.8 | 1.1 | -| [Read-only transactions](/docs/current/sql-start-transaction/) | 2023.8 | 1.1 | -| [AWS Kinesis sink](/docs/current/sink-to-aws-kinesis/) | 2023.7 | 1.0 | -| [CDC Citus source](/docs/current/ingest-from-citus-cdc/) | 2023.5 | 0.19 | -| [Iceberg sink](/docs/current/sink-to-iceberg/) | 2023.4 | 0.18 | -| [Pulsar source](/docs/current/ingest-from-pulsar/) | 2022.12 | 0.1 | +| Feature name | Start version | +| :-- | :-- | +| [Shared source](/sql/commands/sql-create-source/#shared-source) | 2.1 | +| [ASOF join](/docs/current/query-syntax-join-clause/#asof-joins) | 2.1 | +| [Partitioned Postgres CDC table](/docs/current/ingest-from-postgres-cdc/) | 2.1 | +| [Map type](/docs/current/data-type-map/) | 2.0 | +| [Azure Blob sink](/docs/current/sink-to-azure-blob/) | 2.0 | +| [Approx percentile](/docs/current/sql-function-aggregate/#approx_percentile) | 2.0 | +| [Auto schema change in MySQL CDC](/docs/current/ingest-from-mysql-cdc/#automatically-change-schema) | 2.0 | +| [SQL Server CDC source](/docs/current/ingest-from-sqlserver-cdc/) | 2.0 | +| [Sink data in parquet format](/docs/current/data-delivery/#sink-data-in-parquet-format) | 2.0 | +| [Time travel queries](/docs/current/time-travel-queries/) | 2.0 | +| [Manage secrets](/docs/current/manage-secrets/) | 2.0 | +| [Amazon DynamoDB sink](../integrations/destinations/amazon-dynamodb) | 1.10 | +| Auto-map upstream table schema in [MySQL](/docs/current/ingest-from-mysql-cdc/#automatically-map-upstream-table-schema) and [PostgreSQL](/docs/current/ingest-from-postgres-cdc/#automatically-map-upstream-table-schema) | 1.10 | +| [Version column](/docs/current/sql-create-table/) | 1.9 | +| [Snowflake sink](/docs/current/sink-to-snowflake/) | 1.9 | +| [Subscription](/docs/current/subscription/) | 1.9 | +| [RisingWave as PostgreSQL FDW](/docs/current/risingwave-as-postgres-fdw/) | 1.9 | +| [Iceberg source](/docs/current/ingest-from-iceberg/) | 1.8 | +| [Google BigQuery sink](/docs/current/sink-to-bigquery/) | 1.4 | +| [SET BACKGROUND\_DDL command](/docs/current/sql-set-background-ddl/) | 1.3 | +| [Decouple sinks](/docs/current/data-delivery/#sink-decoupling) | 1.3 | +| [Pulsar sink](/docs/current/sink-to-pulsar/) | 1.3 | +| [Cassandra sink](/docs/current/sink-to-cassandra/) | 1.2 | +| [Elasticsearch sink](/docs/current/sink-to-elasticsearch/) | 1.2 | +| [NATS sink](/docs/current/sink-to-nats/) | 1.2 | +| [NATS source](/docs/current/ingest-from-nats/) | 1.2 | +| [Append-only tables](/docs/current/sql-create-table/) | 1.1 | +| [Emit on window close](/docs/current/emit-on-window-close/) | 1.1 | +| [Read-only transactions](/docs/current/sql-start-transaction/) | 1.1 | +| [AWS Kinesis sink](/docs/current/sink-to-aws-kinesis/) | 1.0 | +| [CDC Citus source](/docs/current/ingest-from-citus-cdc/) | 0.19 | +| [Iceberg sink](/docs/current/sink-to-iceberg/) | 0.18 | +| [Pulsar source](/docs/current/ingest-from-pulsar/) | 0.1 | This table will be updated regularly to reflect the latest status of features as they progress through the release stages. diff --git a/changelog/release-notes.mdx b/changelog/release-notes.mdx index eba3cc7c..a3806a61 100644 --- a/changelog/release-notes.mdx +++ b/changelog/release-notes.mdx @@ -3,13 +3,10 @@ title: Release notes description: This page summarizes changes in each version of RisingWave, including new features and important bug fixes. --- -## v2.0.0 + -This version was released on September 18, 2024. +## SQL features -### Main changes - -#### **SQL features** * Query syntax: * **Public preview:** Supports `AS CHANGELOG` to convert any stream into an append-only changelog. [#17132](https://github.com/risingwavelabs/risingwave/pull/17132) @@ -35,7 +32,7 @@ This version was released on September 18, 2024. * Adds `append_only` column in RisingWave catalogs `rw_tables` and `rw_materialized_views`. [#17598](https://github.com/risingwavelabs/risingwave/pull/17598) * Adds RisingWave catalog `rw_catalog.rw_secrets`. [#17726](https://github.com/risingwavelabs/risingwave/pull/17726) -#### **Connectors** +## Connectors * **Public preview:** Supports ingesting Avro map type for source connectors. [#17980](https://github.com/risingwavelabs/risingwave/pull/17980) * **Public preview:** Supports encoding `parquet` for file source. [#17201](https://github.com/risingwavelabs/risingwave/pull/17201) @@ -70,23 +67,23 @@ This version was released on September 18, 2024. * Ensures at-least-once delivery semantic and eventual consistency for Kinesis sink. [#17983](https://github.com/risingwavelabs/risingwave/pull/17983) * Supports backfilling by consuming a fixed snapshot of upstream table and then the upstream data epoch by epoch. [#17735](https://github.com/risingwavelabs/risingwave/pull/17735) -#### **Installation and deployment** +## Installation and deployment * Supports configuring the SQL metastore using username, password, and database separately. [#17530](https://github.com/risingwavelabs/risingwave/pull/17530) * Supports more seamless scaling-in in Kubernetes deployments. [#17802](https://github.com/risingwavelabs/risingwave/pull/17802) -#### **Cluster configuration changes** +## Cluster configuration changes * **Breaking change:** Refactors `streaming_rate_limit` into `source_rate_limit` and `backfill_rate_limit`. [#17796](https://github.com/risingwavelabs/risingwave/pull/17796) * **Breaking change:** Adds a default soft and hard limit on actor count per worker parallelism. When the hard limit is reached, streaming workloads will fail. [#18383](https://github.com/risingwavelabs/risingwave/pull/18383) * Introduces `batch.developer.exchange_connection_pool_size` and `streaming.developer.exchange_connection_pool_size` to configure streaming and batch remote exchange between two nodes. [#17768](https://github.com/risingwavelabs/risingwave/pull/17768) * Introduces system parameter `license_key` used to enable enterprise features. [#17396](https://github.com/risingwavelabs/risingwave/pull/17396) -#### Fixes +## Fixes * Deletes related cursors when deleting a subscription. [#17232](https://github.com/risingwavelabs/risingwave/pull/17232) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v2.0.0-standalone single_node` @@ -97,13 +94,12 @@ This version was released on September 18, 2024. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/v1.10.1...v2.0.0). -## v1.10.0 + -This version was released on July 30, 2024. + -### Main changes -#### SQL features +## SQL features * Query syntax: * SQL commands: @@ -120,7 +116,7 @@ This version was released on July 30, 2024. * System catalog: * Supports `rw_catalog.actor_id_to_ddl` and `rw_catalog.fragment_id_to_ddl`. [#17229](https://github.com/risingwavelabs/risingwave/pull/17229). -#### Connectors +## Connectors * Avro schemas with `"default": "NaN"` and positive and negative infinities, are supported as `float` and `double` types. [#17309](https://github.com/risingwavelabs/risingwave/pull/17309). * Supports ingesting simple `AVRO MAP` types as `JSONB`. [#16948](https://github.com/risingwavelabs/risingwave/pull/16948). @@ -143,20 +139,20 @@ This version was released on July 30, 2024. * Supports checkpoint decouple for Delta Lake sinks. [#16777](https://github.com/risingwavelabs/risingwave/pull/16777). * Supports sinking serial types. [#16969](https://github.com/risingwavelabs/risingwave/pull/16969). -#### Cluster configuration changes +## Cluster configuration changes * Sets arrangement backfill as the default. [#14846](https://github.com/risingwavelabs/risingwave/pull/14846). * Supports spill hash join to avoid OOM issues. [#17122](https://github.com/risingwavelabs/risingwave/pull/17122). * Supports spill hash aggregation for batch queries. [#16771](https://github.com/risingwavelabs/risingwave/pull/16771). * Changes the algorithm that calculates the reserve memory size. [#16992](https://github.com/risingwavelabs/risingwave/pull/16992). -#### Bug fixes +## Bug fixes * Improves error message and location of the cursor. [#16959](https://github.com/risingwavelabs/risingwave/pull/16959). * Improves error message when trying to create a CDC source with columns. [#16636](https://github.com/risingwavelabs/risingwave/pull/16636). * Allows `GRANT` and `REVOKE` privileges on views. [#16699](https://github.com/risingwavelabs/risingwave/pull/16699). -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.10.0 single_node` @@ -167,15 +163,14 @@ This version was released on July 30, 2024. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/release-1.9...release-1.10). -## v1.9.1 + -This version was released on June 6, 2024. + v1.9.0 was skipped due to some critical bugs. -### Main changes -#### SQL features +## SQL features * Query syntax: * Supports non-append-only temporal joins, where the outer side is not required to be append-only. [#16286](https://github.com/risingwavelabs/risingwave/pull/16286). @@ -191,7 +186,7 @@ v1.9.0 was skipped due to some critical bugs. * Supports system table `rw_iceberg_files` for displaying the files of an Iceberg source or table. [#16180](https://github.com/risingwavelabs/risingwave/pull/16180). * Supports system table `rw_iceberg_snapshot` for listing all snapshots. [#16175](https://github.com/risingwavelabs/risingwave/pull/16175). -#### Connectors +## Connectors * Provides stable support for SQLAlchemy 2.0. [#29](https://github.com/risingwavelabs/sqlalchemy-risingwave/pull/29). * Deprecates `s3` connector. [#16337](https://github.com/risingwavelabs/risingwave/pull/16337). @@ -211,18 +206,18 @@ v1.9.0 was skipped due to some critical bugs. * Supports Snowflake sink connector. [#15429](https://github.com/risingwavelabs/risingwave/pull/15429). * Supports creating `upsert` type BigQuery sinks. [#15780](https://github.com/risingwavelabs/risingwave/pull/15780). -#### Installation and deployment +## Installation and deployment * Sets PostgreSQL as the default meta store when deploying with Docker Compose. [#16724](https://github.com/risingwavelabs/risingwave/pull/16724). -#### Cluster configuration changes +## Cluster configuration changes * Supports using `ALTER SYSTEM` to set a system-wide default value for a session parameter. [#16062](https://github.com/risingwavelabs/risingwave/pull/16062). * Modifies the meaning of `streaming_rate_limit=0`, which now means pausing the snapshot read stream for backfill, and pausing source read for sources. This statement previously disabled the rate limit within the session. [#16333](https://github.com/risingwavelabs/risingwave/pull/16333). * Supports configuring the reserved memory bytes of the compute node by using `RW_RESERVED_MEMORY_BYTES` runtime parameter and `reserved-memory-bytes` startup option. [#16433](https://github.com/risingwavelabs/risingwave/pull/16433). * Introduce new timeout and retry configurations for ObjectStore and deprecate ambiguous timeout configurations. [#16231](https://github.com/risingwavelabs/risingwave/pull/16231). -#### Fixes +## Fixes * Properly convert `-inf`, `+inf`, and `nan` types to `null` for JDBC sinks. [#16230](https://github.com/risingwavelabs/risingwave/pull/16230). * Handles sinking `-inf`, `+inf`, and `nan` types for ClickHouse, Doris, and StarRocks sink connectors. [#15664](https://github.com/risingwavelabs/risingwave/pull/15664). @@ -231,7 +226,7 @@ v1.9.0 was skipped due to some critical bugs. * Fixes sources with `encode avro` on decimal ingesting. [#16202](https://github.com/risingwavelabs/risingwave/pull/16202). * Fixes sources with `encode avro` on bytes/fixed/decimal default value. [#16414](https://github.com/risingwavelabs/risingwave/pull/16414). -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.9.1-standalone single_node` @@ -242,13 +237,11 @@ v1.9.0 was skipped due to some critical bugs. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/release-1.8...release-1.9). -## v1.8.0 - -This version was released on April 3, 2024. + -### Main changes + -#### SQL features +## SQL features * Query syntax: * Supports `RANGE` frames in window function calls. [#14416](https://github.com/risingwavelabs/risingwave/pull/14416). @@ -260,7 +253,7 @@ This version was released on April 3, 2024. * Adds a description column to the system parameters table. [#15113](https://github.com/risingwavelabs/risingwave/pull/15113). * Supports authenticating with OAuth token acquired from the Cloud when creating a user. [#13151](https://github.com/risingwavelabs/risingwave/pull/13151). * SQL functions & operators: - * Supports ruby-pg. [#14859](https://github.com/risingwavelabs/risingwave/pull/14859). See [Use RisingWave in your Ruby application](/docs/1.8/ruby-client-libraries/). + * Supports ruby-pg. [#14859](https://github.com/risingwavelabs/risingwave/pull/14859). * Supports `VARIADIC` arguments for the functions `format`, `concat_ws`, `jsonb_build_array`, `jsonb_build_object`, `jsonb_extract_path`, `jsonb_extract_path_text`. [#14753](https://github.com/risingwavelabs/risingwave/pull/14753). * Supports `concat` function. [#14753](https://github.com/risingwavelabs/risingwave/pull/14753). * System catalog: @@ -270,7 +263,7 @@ This version was released on April 3, 2024. * Supports system table `rw_depend`. [#15385](https://github.com/risingwavelabs/risingwave/pull/15385). * Supports `pg_settings` catalog. [#15108](https://github.com/risingwavelabs/risingwave/pull/15108). -#### Connectors +## Connectors * **Breaking change:** Sinks created from v1.6 and earlier that have `decouple` enabled may cause compatibility issues. Check if you have any sinks with this configuration by using the internal table `rw_sink_decouple` before upgrading to v1.8. [#15613](https://github.com/risingwavelabs/risingwave/pull/15613). * Avro tables and sources now require a schema registry during creation. [#15256](https://github.com/risingwavelabs/risingwave/pull/15256). @@ -286,17 +279,17 @@ This version was released on April 3, 2024. * Supports JDBC catalog for Iceberg sources. [#15551](https://github.com/risingwavelabs/risingwave/pull/15551). * Adds JDBC and Hive catalogs for Iceberg sink. [#14885](https://github.com/risingwavelabs/risingwave/pull/14885). -#### Installation and deployment +## Installation and deployment * Supports tab-completion for `SET` and `ALTER SYSTEM SET` commands in `psql` client. [#15123](https://github.com/risingwavelabs/risingwave/pull/15123). -* Supports SQL meta store. [#16019](https://github.com/risingwavelabs/risingwave/pull/16019). See [Start RisingWave using Docker Compose](/docs/1.8/risingwave-docker-compose/#customize-meta-store). +* Supports SQL meta store. [#16019](https://github.com/risingwavelabs/risingwave/pull/16019). -#### Bug fixes +## Bug fixes * Fixes an issue where built-in CDC connectors do not accept empty passwords. [#15411](https://github.com/risingwavelabs/risingwave/pull/15411). * Fixes an issue where materialized views created on a shared CDC source were allowed. [#15635](https://github.com/risingwavelabs/risingwave/pull/15635). -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.8.0-standalone single_node` @@ -307,13 +300,11 @@ This version was released on April 3, 2024. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/release-1.7...release-1.8). -## v1.7.0 - -This version was released on February 29, 2024. + -### Main changes + -#### SQL features +## SQL features * Query syntax: * Supports using `*` when creating a table or source with schema from an external connector to read all columns. [#14644](https://github.com/risingwavelabs/risingwave/pull/14644). @@ -333,25 +324,25 @@ This version was released on February 29, 2024. * Adds `relpersistence` in `pg_class` catalog. [#14400](https://github.com/risingwavelabs/risingwave/pull/14400). * Supports `pg_get_viewdef()`. [#14336](https://github.com/risingwavelabs/risingwave/pull/14336). -#### Connectors +## Connectors * Cassandra and ScyllaDB sinks no longer support `timestamp` type. [#14413](https://github.com/risingwavelabs/risingwave/pull/14413). * Updates StarRocks sink connector parameters. [#14823](https://github.com/risingwavelabs/risingwave/pull/14823). * Introduces `snapshot` option to allow users to disable CDC backfill and to only consume from the latest changelog. [#14718](https://github.com/risingwavelabs/risingwave/pull/14718). * Sets the default value of `transactional` parameter to `true` for MySQL and Postgres CDC shared sources. [#14899](https://github.com/risingwavelabs/risingwave/pull/14899). -#### Installation and deployment +## Installation and deployment * \[Pre-Release\] Supports the standalone mode to run RisingWave in a single process. [#14951](https://github.com/risingwavelabs/risingwave/pull/14951). * Supports Alibaba Cloud OSS as the storage backend. -#### Cluster configuration changes +## Cluster configuration changes * Introduce a session variable `batch_enable_distributed_dml` to enable batch ingesting. [#14630](https://github.com/risingwavelabs/risingwave/pull/14630). * Changes wording from `AUTO` to `ADAPTIVE` parallelism. [#14414](https://github.com/risingwavelabs/risingwave/pull/14414). * Supports adaptive scaling for streaming jobs by default. [#14873](https://github.com/risingwavelabs/risingwave/pull/14873). -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.7.0-standalone single_node` @@ -362,13 +353,11 @@ This version was released on February 29, 2024. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/release-1.6...release-1.7). -## v1.6.0 + -This version was released on January 11, 2024. + -### Main changes - -#### SQL features +## SQL features * Query syntax: * Allows `NOW` in upper bound condition for temporal filters. [#13985](https://github.com/risingwavelabs/risingwave/pull/13985). @@ -386,7 +375,7 @@ This version was released on January 11, 2024. * System catalog: * Add system view `rw_streaming_parallelism`. [#14261](https://github.com/risingwavelabs/risingwave/pull/14261). -#### Connectors +## Connectors * Adds CDC backfill support for Postgres so users can ingest multiple PostgreSQL tables with a single replication slot. [#13958](https://github.com/risingwavelabs/risingwave/pull/13958). * Support multi-table transactions from upstream MySQL & Postgres CDC. Specify `transactional = true` in the `WITH` options to enable it. [#14375](https://github.com/risingwavelabs/risingwave/pull/14375). @@ -399,17 +388,17 @@ This version was released on January 11, 2024. * `connector = 'iceberg_java'` is deprecated, and users can only Iceberg sinks with the Rust version of Iceberg. Similarly, the DeltaLake sink will also use the Rust version implementation. [#14277](https://github.com/risingwavelabs/risingwave/pull/14277). * Supports StarRocks sink. [#12681](https://github.com/risingwavelabs/risingwave/pull/12681). -#### Installation and deployment +## Installation and deployment * Allows for `storage.prefetch_buffer_capacity_mb` to be configured in the TOML file to prevent out-of-memory issues. [#13558](https://github.com/risingwavelabs/risingwave/pull/13558). * Supports Huawei Cloud OBS as the storage backend. [#13844](https://github.com/risingwavelabs/risingwave/pull/13844). -#### Cluster configuration changes +## Cluster configuration changes * Supports setting `statement_timeout` value for queries. [#13933](https://github.com/risingwavelabs/risingwave/pull/13933). * Exposes SSL functionality through `RW_SSL_CERT` and `RW_SSL_KEY` environment variables to configure SSL certificates and key file location. [#14062](https://github.com/risingwavelabs/risingwave/pull/14062). -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.6.0 playground` @@ -420,12 +409,11 @@ This version was released on January 11, 2024. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/release-1.5...release-1.6). -## v1.5.0 + -This version was released on December 11, 2023. + -### Main changes -#### SQL features +## SQL features * SQL Commands: * Supports `SET SCHEMA` syntax for `ALTER {TABLE t | [MATERIALIZED] VIEW (m)v | SOURCE src | SINK sink | CONNECTION c | FUNCTION f( argument_type [, ...] )}`. [#13341](https://github.com/risingwavelabs/risingwave/pull/13341). @@ -447,21 +435,21 @@ This version was released on December 11, 2023. * Adds system table `rw_internal_tables`. [#13272](https://github.com/risingwavelabs/risingwave/pull/13272). * Supports sink columns in `rw_columns` and `information_schema.columns`. [#13626](https://github.com/risingwavelabs/risingwave/pull/13626). -### Sources & sink +## Sources & sink * The load generator can generate `timestamptz` columns. [#13451](https://github.com/risingwavelabs/risingwave/pull/13451). * Adds option `[properties.fetch.queue.backoff.ms](http://properties.fetch.queue.backoff.ms)` for Kafka source. [#13321](https://github.com/risingwavelabs/risingwave/pull/13321). * Supports creating multiple CDC tables that share the same source, which allows for incremental and lock-free snapshot loading. [#12535](https://github.com/risingwavelabs/risingwave/pull/12535). * `CREATE SINK` statements no longer need to wait for backfill to complete. [#13665](https://github.com/risingwavelabs/risingwave/pull/13665) -### Deployment +## Deployment * Adds a docker-compose file for standalone mode. [#13233](https://github.com/risingwavelabs/risingwave/pull/13233). ### Cluster configuration changes * Adds support for system parameter `pause_on_next_bootstrap`. [#11936](https://github.com/risingwavelabs/risingwave/pull/11936) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.5.0 playground` * [Prebuilt all-in-one library for Linux](https://github.com/risingwavelabs/risingwave/releases/download/v1.5.0/risingwave-v1.5.0-x86_64-unknown-linux-all-in-one.tar.gz) @@ -471,13 +459,12 @@ This version was released on December 11, 2023. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/v1.4-rc...v1.5.0-rc). -## v1.4.0 + -This version was released on November 10, 2023. + -### Main changes -#### SQL features +## SQL features * Query syntax: * Supports using subqueries in `UPDATE` and `DELETE` statements. [#12995](https://github.com/risingwavelabs/risingwave/pull/12995) @@ -488,7 +475,7 @@ This version was released on November 10, 2023. * Supports exposing hidden columns when using `DESCRIBE` command. [#12839](https://github.com/risingwavelabs/risingwave/pull/12839) * SQL functions & operators: * Supports `substring` and `substr` functions for `bytea` data type. [#13088](https://github.com/risingwavelabs/risingwave/pull/13088). - * Supports functions `jsonb_pretty`,  `jsonb_object`, `jsonb_strip_nulls`, and `jsonb_extract_path`. [#13050](https://github.com/risingwavelabs/risingwave/pull/13050), [#13036](https://github.com/risingwavelabs/risingwave/pull/13036), [#13169](https://github.com/risingwavelabs/risingwave/pull/13169), [#13143](https://github.com/risingwavelabs/risingwave/pull/13143). See [JSON functions](/docs/1.4/sql-function-json/#json-functions). + * Supports functions `jsonb_pretty`,  `jsonb_object`, `jsonb_strip_nulls`, and `jsonb_extract_path`. [#13050](https://github.com/risingwavelabs/risingwave/pull/13050), [#13036](https://github.com/risingwavelabs/risingwave/pull/13036), [#13169](https://github.com/risingwavelabs/risingwave/pull/13169), [#13143](https://github.com/risingwavelabs/risingwave/pull/13143). * Supports jsonb `@>`, `<@`, `?`, `?|, ?&`, `#>`,  `#>>`, `-` and `#-` operators. [#13056](https://github.com/risingwavelabs/risingwave/pull/13056), [#13110](https://github.com/risingwavelabs/risingwave/pull/13110), [#13118](https://github.com/risingwavelabs/risingwave/pull/13118). * Supports `greatest` and `least` functions. [#12838](https://github.com/risingwavelabs/risingwave/pull/12838). * Supports `regexp_split_to_array` function. [#12844](https://github.com/risingwavelabs/risingwave/pull/12844). @@ -499,7 +486,7 @@ This version was released on November 10, 2023. * Adds column `parallelism` in system table `rw_fragments`. [#12901](https://github.com/risingwavelabs/risingwave/pull/12901) * Adds columns `is_hidden`, `is_primary_key`, and `is_distribution_key` in `rw_columns` system table. [#12839](https://github.com/risingwavelabs/risingwave/pull/12839) -#### Sources & sinks +## Sources & sinks * Adds `google.protobuf.Any` support for Protobuf sources. [#12291](https://github.com/risingwavelabs/risingwave/pull/12291). * Adds `schemas.enable` support for Kafka sinks with upsert JSON. [#12113](https://github.com/risingwavelabs/risingwave/pull/12113). @@ -512,11 +499,11 @@ This version was released on November 10, 2023. * Adds support for Google BigQuery sink.[#12873](https://github.com/risingwavelabs/risingwave/pull/12873). * Adds support for Redis sink. [#11999](https://github.com/risingwavelabs/risingwave/pull/11999),[#13003](https://github.com/risingwavelabs/risingwave/pull/13003). -#### Deployment +## Deployment * Release RisingWave all-in-one binary with connector libraries. [#13133](https://github.com/risingwavelabs/risingwave/pull/13133) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.4.0 playground` @@ -527,13 +514,11 @@ This version was released on November 10, 2023. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/v1.3-rc...v1.4-rc). -## v1.3.0 - -This version was released on October 18, 2023. + -### Main changes + -#### SQL features +## SQL features * SQL commands * Beta: Variable `BACKGROUND_DDL` can be set to `true` when creating a materialized view. [#12355](https://github.com/risingwavelabs/risingwave/pull/12355) @@ -558,7 +543,7 @@ This version was released on October 18, 2023. * `rw_hummock_branched_objects` , `rw_hummock_current_version` , `rw_hummock_checkpoint_version` , `rw_hummock_version_deltas` [#12309](https://github.com/risingwavelabs/risingwave/pull/12309) * `rw_hummock_meta_configs`, `rw_hummock_compaction_group_configs` [#12337](https://github.com/risingwavelabs/risingwave/pull/12337) -#### Sources & sinks +## Sources & sinks * Generated columns defined with non-deterministic functions cannot be part of the primary key. [#12181](https://github.com/risingwavelabs/risingwave/pull/12181) * Adds new `properties.enable.auto.commit` parameter for the Kafka consumer, which sets the `enable.auto.commit` parameter for the Kafka client. [#12223](https://github.com/risingwavelabs/risingwave/pull/12223) @@ -580,7 +565,7 @@ This version was released on October 18, 2023. #### Administration & observability * Supports querying from `information_schema.views`, which contains formations about views defined in the database. [#12045](https://github.com/risingwavelabs/risingwave/pull/12045) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.3.0 playground` @@ -591,13 +576,11 @@ This version was released on October 18, 2023. * [risingwave-connector-v1.3.0.tar.gz](https://github.com/risingwavelabs/risingwave/releases/download/v1.3.0/risingwave-connector-v1.3.0.tar.gz) See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/v1.2-rc...v1.3-rc). + -## v1.2.0 -This version was released on September 11, 2023. + -### Main changes - -#### SQL features +## SQL features * SQL commands: @@ -653,19 +636,20 @@ This version was released on September 11, 2023. * Adds support for transactions for single-table CDC data. [#11453](https://github.com/risingwavelabs/risingwave/pull/11453) -#### Sources & sinks +## Sources & sinks * Adds a new parameter `schema.registry.name.strategy` to the Kafka connector, with which you can specify naming strategies for schema registries. [#11384](https://github.com/risingwavelabs/risingwave/pull/11384) -* Breaking Change: Implements a Rust-native Iceberg sink connector to improve stability and performance. The connector introduces new parameters. Applications that rely on the previous version of the feature (specifically, the version included in RisingWave v1.0 and v1.1) may no longer function correctly. To restore functionality to your applications, please carefully review the syntax and parameters outlined on this page and make any necessary revisions to your code. Please refer to [Sink data to Iceberg](/docs/1.6/sink-to-iceberg/) for details. [#11326](https://github.com/risingwavelabs/risingwave/pull/11326) -* Adds support for sinking data to ClickHouse. For a detailed guide about how to sink data from RisingWave to ClickHouse, see [Sink data to ClickHouse](/docs/1.6/sink-to-clickhouse/). [#11240](https://github.com/risingwavelabs/risingwave/pull/11240) +* Breaking Change: Implements a Rust-native Iceberg sink connector to improve stability and performance. The connector introduces new parameters. Applications that rely on the previous version of the feature (specifically, the version included in RisingWave v1.0 and v1.1) may no longer function correctly. +To restore functionality to your applications, please carefully review the syntax and parameters outlined on this page and make any necessary revisions to your code. [#11326](https://github.com/risingwavelabs/risingwave/pull/11326) +* Adds support for sinking data to ClickHouse. [#11240](https://github.com/risingwavelabs/risingwave/pull/11240) * Beta: An enhancement has been made to the mysql-cdc connector to improve data ingestion performance. It achieves so by optimizing the data backfilling logic for CDC tables. This feature is not enabled by default. To enable it, run this command: `SET cdc_backfill="true";` [#11707](https://github.com/risingwavelabs/risingwave/pull/11707) * Adds a parameter `client.id` for Kafka sources. [#11911](https://github.com/risingwavelabs/risingwave/pull/11911) -#### Deployment +## Deployment * Supports HDFS as the storage backend for deployments via Docker Compose. [#11632](https://github.com/risingwavelabs/risingwave/pull/11632) -#### Administration & observability +## Administration & observability * Adds a new system parameter `max_concurrent_creating_streaming_jobs`, with which users can specify the maximum number of streaming jobs that can be created concurrently. [#11601](https://github.com/risingwavelabs/risingwave/pull/11601) * Improves the calculation logic of the _Mem Table Size (Max)_ metric in the RisingWave Dashboard. [#11442](https://github.com/risingwavelabs/risingwave/pull/11442) @@ -674,7 +658,7 @@ This version was released on September 11, 2023. * _Materialized View Read Size_, _Materialized View Write Size_ [#11054](https://github.com/risingwavelabs/risingwave/pull/11054) * _Active Sessions_ [#11688](https://github.com/risingwavelabs/risingwave/pull/11688) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.2.0 playground` @@ -686,13 +670,11 @@ This version was released on September 11, 2023. See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/v1.1.0...v1.2.0). -## v1.1.0 + -This version was released on August 8, 2023. + -### Main changes - -#### SQL features +## SQL features * SQL commands: @@ -769,7 +751,7 @@ This version was released on August 8, 2023. * Supports querying `created_at` and `initialized_at` from RisingWave relations such as sources, sinks, and tables in RisingWave catalogs. [#11199](https://github.com/risingwavelabs/risingwave/pull/11199) -#### Connectors +## Connectors * Supports specifying more Kafka parameters when creating a source or sink. [#11203](https://github.com/risingwavelabs/risingwave/pull/11203) @@ -778,7 +760,7 @@ This version was released on August 8, 2023. * `access_key` and its corresponding `secret_key` are now mandatory for all AWS authentication components. [#11120](https://github.com/risingwavelabs/risingwave/pull/11120) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.1.0 playground` @@ -786,13 +768,11 @@ This version was released on August 8, 2023. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v1.1.0.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v1.1.0.tar.gz) -## v1.0.0 - -This version was released on July 12, 2023. + -### Main changes + -#### SQL features +## SQL features * SQL command: @@ -841,7 +821,7 @@ This version was released on July 12, 2023. * Supports tagging the created VPC endpoints when creating a PrivateLink connection. [#10582](https://github.com/risingwavelabs/risingwave/pull/10582) -#### Connectors +## Connectors * _**Breaking change**_: When creating a source or table with a connector whose schema is auto-resolved from an external format file, the syntax for defining primary keys within column definitions is replaced with the table constraint syntax. [#10195](https://github.com/risingwavelabs/risingwave/pull/10195) @@ -903,7 +883,7 @@ This version was released on July 12, 2023. * Supports using the user-provided publication to create a PostgreSQL CDC table. [#10804](https://github.com/risingwavelabs/risingwave/pull/10804) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v1.0.0 playground` @@ -911,22 +891,20 @@ This version was released on July 12, 2023. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v1.0.0.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v1.0.0.tar.gz) -## v0.19.0 + -This version was released on June 1, 2023. + -### Main changes - -#### Installation +## Installation * Now, you can easily install RisingWave on your local machine with Homebrew by running `brew install risingwave`. See [Run RisingWave](/docs/current/get-started/#install-and-start-risingwave). -#### Administration +## Administration * Adds the `pg_indexes` and `dattablespace` system catalogs. [#9844](https://github.com/risingwavelabs/risingwave/pull/9844), [#9822](https://github.com/risingwavelabs/risingwave/pull/9822) * Now, the `SHOW PARAMETERS` command will display the mutability of each system parameter. [#9526](https://github.com/risingwavelabs/risingwave/pull/9526) -#### SQL features +## SQL features * Experimental features: Adds support for 256-bit integers. [#9146](https://github.com/risingwavelabs/risingwave/pull/9146), [#9184](https://github.com/risingwavelabs/risingwave/pull/9184), [#9186](https://github.com/risingwavelabs/risingwave/pull/9186), [#9191](https://github.com/risingwavelabs/risingwave/pull/9191), [#9217](https://github.com/risingwavelabs/risingwave/pull/9217) * Indexes can be created on expressions. [#9142](https://github.com/risingwavelabs/risingwave/pull/9142) @@ -968,7 +946,7 @@ This version was released on June 1, 2023. * Adds support for the `starts_with()` string function and `^@`. [#9967](https://github.com/risingwavelabs/risingwave/pull/9967) * Adds support for unary `trunc`, `ln`, `log10` (`log`), `exp`, `cbrt` (`||/`) mathematical functions. [#9991](https://github.com/risingwavelabs/risingwave/pull/9991) -#### Connectors +## Connectors * Adds support for ingesting CDC data from TiDB and sinking data to TiDB with the JDBC connector. [#8708](https://github.com/risingwavelabs/risingwave/pull/8708) * Adds support for ingesting CDC data from Citus. [#8988](https://github.com/risingwavelabs/risingwave/pull/8988) @@ -981,12 +959,12 @@ This version was released on June 1, 2023. * Adds support for a new row format `DEBEZIUM_MONGO_JSON` in the Kafka source connector. [#9250](https://github.com/risingwavelabs/risingwave/pull/9250) * Adds CSV format support for the Kafka source connector. [#9875](https://github.com/risingwavelabs/risingwave/pull/9875) -#### Cluster configuration changes +## Cluster configuration changes * `--data_directory`and `--state_store`must be specified on CLI of the meta node, or the cluster will fail to start. [#9170](https://github.com/risingwavelabs/risingwave/pull/9170) * Clusters will refuse to start if the specified object store URL identified by `state_store` and `data_directory` is occupied by another instance. Do not share the object store URL between multiple clusters. [#9642](https://github.com/risingwavelabs/risingwave/pull/9642) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v0.19.0 playground` @@ -994,20 +972,18 @@ This version was released on June 1, 2023. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.19.0.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.19.0.tar.gz) -## v0.18.0 + -This version was released on March 31, 2023. + Starting from this version, we’ll respect semantic versioning conventions by using the middle number (`y` , instead of `z,` in `x.y.z`) to indicate minor versions. That is why this is `v0.18.0`, not `v0.1.18`. -### Main changes - -#### Administration and troubleshooting +## Administration and troubleshooting * Improves error messages by including the location of the statement in question. [#8646](https://github.com/risingwavelabs/risingwave/pull/8646) * Initial values of immutable system parameters can be specified via the meta-node command line. The initial values provided in the configuration file will be ignored. [#8366](https://github.com/risingwavelabs/risingwave/pull/8366) -#### SQL features +## SQL features * Adds initial support for user-defined functions. [#8597](https://github.com/risingwavelabs/risingwave/pull/8597) [#8644](https://github.com/risingwavelabs/risingwave/pull/8644) [#8255](https://github.com/risingwavelabs/risingwave/pull/8255) [#7943](https://github.com/risingwavelabs/risingwave/pull/7943) * Adds support for JSONB data type. [#8256](https://github.com/risingwavelabs/risingwave/pull/8256) [#8181](https://github.com/risingwavelabs/risingwave/pull/8181) @@ -1040,7 +1016,7 @@ Starting from this version, we’ll respect semantic versioning conventions by u * Improves the behaviors of the `exp` operator when the operand is too large or small. [#8309](https://github.com/risingwavelabs/risingwave/pull/8309) * Supports process time temporal join, which enables the joining of an append-only stream (such as Kafka) with a temporal table (e.g. a materialized view backed by MySQL CDC). This feature ensures that any updates made to the temporal table will not affect previous results obtained from the temporal join. Supports `FOR SYSTEM_TIME AS OF NOW()` syntax to express process time temporal join. [#8480](https://github.com/risingwavelabs/risingwave/pull/8480) -#### Connectors +## Connectors * Adds a new field `basetime` to the load generator connector for generating timestamp data. The load generator will take this field as `now` and generates data accordingly. [#8619](https://github.com/risingwavelabs/risingwave/pull/8619) * Empty cells in CSV are now parsed as null. [#8709](https://github.com/risingwavelabs/risingwave/pull/8709) @@ -1051,7 +1027,7 @@ Starting from this version, we’ll respect semantic versioning conventions by u See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/compare/v0.1.17...v0.18.0). -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v0.18.0 playground` @@ -1059,17 +1035,16 @@ See the **Full Changelog** [here](https://github.com/risingwavelabs/risingwave/c * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.18.0.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.18.0.tar.gz) -## v0.1.17 -This version was released on February 28, 2023 + -### Main changes + -#### Administration +## Administration * Adds a system catalog view `rw_catalog.rw_ddl_progress`, with which users can view the progress of a `CREATE INDEX`, `CREATE SINK`, or `CREATE MATERIALIZED VIEW` statement. [#7914](https://github.com/risingwavelabs/risingwave/pull/7914) * Adds the `pg_conversion` and `pg_enum` system catalogs. [#7964](https://github.com/risingwavelabs/risingwave/pull/7964), [#7706](https://github.com/risingwavelabs/risingwave/pull/7706) -#### SQL features +## SQL features * Adds the `exp()` function. [#7971](https://github.com/risingwavelabs/risingwave/pull/7971) * Adds the `pow()` function. [#7789](https://github.com/risingwavelabs/risingwave/pull/7789) @@ -1077,7 +1052,7 @@ This version was released on February 28, 2023 * Adds support for descending order in `CREATE INDEX` statements. [#7822](https://github.com/risingwavelabs/risingwave/pull/7822) * Adds `SHOW PARAMETERS` and `ALTER SYSTEM` commands to display and update system parameters. [#7882](https://github.com/risingwavelabs/risingwave/pull/7882), [#7913](https://github.com/risingwavelabs/risingwave/pull/7913) -#### connectors +## Connectors * Adds a new parameter `match_pattern` to the S3 connector. With the new parameter, users can specify the pattern to filter files that they want to ingest from S3 buckets. For documentation updates, see [Ingest data from S3 buckets](/docs/current/ingest-from-s3/). [#7565](https://github.com/risingwavelabs/risingwave/pull/7565) * Adds the PostgreSQL CDC connector. Users can use this connector to ingest data and CDC events from PostgreSQL directly. For documentation updates, see [Ingest data from PostgreSQL CDC](/docs/current/ingest-from-postgres-cdc/). [#6869](https://github.com/risingwavelabs/risingwave/pull/6869), [#7133](https://github.com/risingwavelabs/risingwave/pull/7133) @@ -1088,7 +1063,7 @@ This version was released on February 28, 2023 * `use_transaction` : Specifies whether to enable Kafka transactions or not. [#7500](https://github.com/risingwavelabs/risingwave/pull/7500) * SSL/SASL parameters: Specifies SSL encryption and SASL authentication settings. [#7540](https://github.com/risingwavelabs/risingwave/pull/7540) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v0.1.17 playground` @@ -1096,17 +1071,15 @@ This version was released on February 28, 2023 * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.17.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.17.tar.gz) -## v0.1.16 + -This version was released on February 1, 2023. + -### Main changes - -#### Administration +## Administration * Adds support for aborting a query in local mode with `Ctrl + C`. [#7444](https://github.com/risingwavelabs/risingwave/pull/7444) -#### SQL features +## SQL features * Adds support for the `to_timestamp` function. [#7060](https://github.com/risingwavelabs/risingwave/pull/7060) * Adds support for the `RETURNING` clause in DML statements. [#7094](https://github.com/risingwavelabs/risingwave/pull/7094) @@ -1120,12 +1093,12 @@ This version was released on February 1, 2023. * Adds support for `SET VISIBILITY_MODE` You can use this session variable to configure whether only checkpoint data is readable for batch query. [#5850](https://github.com/risingwavelabs/risingwave/pull/5850) * Adds support for `SET STREAMING_PARALLELISM` . You can use this session variable to configure parallelism for streaming queries. [#7370](https://github.com/risingwavelabs/risingwave/pull/7370) -#### Connectors +## Connectors * Adds support for generating array and struct data using the datagen connector. [#7099](https://github.com/risingwavelabs/risingwave/pull/7099) * Adds the S3 source connector, with which users can ingest data in CSV format from S3 locations. For data ingestion from files, CSV is the only supported format and the files must be placed on S3. [#6846](https://github.com/risingwavelabs/risingwave/pull/6846) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v0.1.16 playground` @@ -1133,19 +1106,17 @@ This version was released on February 1, 2023. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.16.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.16.tar.gz) -## v0.1.15 - -This version was released on January 4, 2023. + -### Main changes + -#### Installation and deployment +## Installation and deployment * Parallelism and available memory of compute nodes are now command-line arguments and removed from the configuration file. [#6767](https://github.com/risingwavelabs/risingwave/pull/6767) * The default barrier interval is set to 1 second. [#6553](https://github.com/risingwavelabs/risingwave/pull/6553) * Adds support for meta store backup and recovery. [#6737](https://github.com/risingwavelabs/risingwave/pull/6737) -#### SQL features +## SQL features * Adds support for `SHOW CREATE MATERIALIZED VIEW` and `SHOW CREATE VIEW` to show how materialized and non-materialized views are defined. [#6921](https://github.com/risingwavelabs/risingwave/pull/6921) * Adds support for `CREATE TABLE IF NOT EXISTS`. [#6643](https://github.com/risingwavelabs/risingwave/pull/6643) @@ -1156,30 +1127,28 @@ This version was released on January 4, 2023. * Ads the initial support for batch query on Kafka source. [#6474](https://github.com/risingwavelabs/risingwave/pull/6474) * Adds support for `SET QUERY_EPOCH` to query historical data based on meta backup. [#6840](https://github.com/risingwavelabs/risingwave/pull/6840) -#### Connectors +## Connectors * Improves the handling of schema errors for Avro and Protobuf data. [#6821](https://github.com/risingwavelabs/risingwave/pull/6821) * Adds two options to the datagen connector to make it possible to generate increasing timestamp values. [#6591](https://github.com/risingwavelabs/risingwave/pull/6591) -#### Observability +## Observability * Adds metrics for the backup manager in Grafana. [#6898](https://github.com/risingwavelabs/risingwave/pull/6898) * RisingWave Dashboard can now fetch data from Prometheus and visualize it in charts. [#6602](https://github.com/risingwavelabs/risingwave/pull/6602) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v0.1.15 playground` * [Prebuilt library for Linux](https://github.com/risingwavelabs/risingwave/releases/download/v0.1.15/risingwave-v0.1.15-x86_64-unknown-linux.tar.gz) * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.15.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.15.tar.gz) -## v0.1.14 + -This version was released on December 1, 2022. + -### Main changes - -#### SQL features +## SQL features * `PRIMARY KEY` constraint checks can be performed on materialized sources and tables but not on non-materialized sources. For tables or materialized sources that enabled `PRIMARY KEY` constraints, if you insert data to an existing key, the new data will overwrite the old data. [#6320](https://github.com/risingwavelabs/risingwave/pull/6320) [#6435](https://github.com/risingwavelabs/risingwave/pull/6435) * Adds support for timestamp with time zone data type. You can use this data type in time window functions, and convert between it and timestamp (without time zone). [#5855](https://github.com/risingwavelabs/risingwave/pull/5855) [#5910](https://github.com/risingwavelabs/risingwave/pull/5910) [#5968](https://github.com/risingwavelabs/risingwave/pull/5968) @@ -1190,14 +1159,14 @@ This version was released on December 1, 2022. * Adds the system catalog schema. [#6227](https://github.com/risingwavelabs/risingwave/pull/6227) * Displays error messages when users enter conflicting or redundant command options. [#5933](https://github.com/risingwavelabs/risingwave/pull/5933/) -#### Connectors +## Connectors * Adds support for the Maxwell Change Data Capture (CDC) format. [#6057](https://github.com/risingwavelabs/risingwave/pull/6057) * Protobuf schema files can be loaded from Web locations in `s3://`, `http://`, or `https://` formats. [#6114](https://github.com/risingwavelabs/risingwave/pull/6114) [#5964](https://github.com/risingwavelabs/risingwave/pull/5964) * Adds support for Confluent Schema Registry for Kafka data in Avro and Protobuf formats. [#6289](https://github.com/risingwavelabs/risingwave/pull/6289) * Adds two options to the Kinesis connector. Users can specify the startup mode and optionally the sequence number to start with. [#6317](https://github.com/risingwavelabs/risingwave/pull/6317) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v0.1.14 playground` @@ -1205,13 +1174,11 @@ This version was released on December 1, 2022. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.14.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.14.tar.gz) -## v0.1.13 - -This version was released on October 17, 2022. + -### Main changes + -#### SQL features +## SQL features * SQL commands: @@ -1256,11 +1223,11 @@ This version was released on October 17, 2022. * `\di`: Lists all indexes in the current database. * `pg_catalog.pg_index`: Contains information about indexes. -#### Connectors +## Connectors * Nested columns are now supported for the datagen connector. [#5550](https://github.com/risingwavelabs/risingwave/pull/5550) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 risingwavelabs/risingwave:v0.1.13 playground` @@ -1268,13 +1235,11 @@ This version was released on October 17, 2022. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.13.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.13.tar.gz) -## v0.1.12 + -This version was released on September 7, 2022. + -### Main changes - -#### SQL features +## SQL features * SQL commands: * `EXPLAIN` now supports specifying options. Supported options: `trace`, `verbose`, and `type`. Unlike PostgreSQL, each option should be separated by a comma and wrapped by parentheses as a whole. [#4730](https://github.com/risingwavelabs/risingwave/pull/4730) @@ -1285,7 +1250,7 @@ This version was released on September 7, 2022. * Adds support for new system information functions: `current_schema`, `current_schema()`, and `session_user`. [#4358](https://github.com/risingwavelabs/risingwave/pull/4358) * The `pg_namespace` catalog now has a new namespace column `nspacl` for storing access privileges. [#4326](https://github.com/risingwavelabs/risingwave/pull/4326) -#### Connectors +## Connectors * Some connector parameters were renamed. The old parameter names are still functional but may be deprecated in the future. [#4503](https://github.com/risingwavelabs/risingwave/pull/4503) @@ -1316,24 +1281,22 @@ This version was released on September 7, 2022. * The row format name, `debezium json`, for CDC stream sources, has been renamed to `debezium_json`. [#4494](https://github.com/risingwavelabs/risingwave/pull/4494) -#### Configuration changes +## Configuration changes * The default batch query execution mode was changed from distributed to local. [#4789](https://github.com/risingwavelabs/risingwave/pull/4789) -### Assets +## Assets * Run this version from Docker: `docker run -it --pull=always -p 4566:4566 -p 5691:5691 ghcr.io/risingwavelabs/risingwave:v0.1.12 playground` * Prebuilt library for Linux is not available in this release. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.12.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.12.tar.gz) -## v0.1.11 - -This version was released on July 29, 2022. + -### Main changes + -#### SQL features +## SQL features * New SQL functions: * `overlay()`: Replaces a substring. [#3671](https://github.com/risingwavelabs/risingwave/pull/3671) @@ -1349,14 +1312,14 @@ This version was released on July 29, 2022. * An `ORDER BY` clause in the `CREATE MATERIALIZED VIEW` statement is allowed but not considered as part of the definition of the materialized view. It is only used in the initial creation of the materialized view. It is not used during refreshes. This is a behavior change due to the introduction of parallel table scans. [#3670](https://github.com/risingwavelabs/risingwave/pull/3670) * Support for filter clauses on aggregate functions. [#4114](https://github.com/risingwavelabs/risingwave/pull/4114) -#### Connectors +## Connectors * RisingWave can now sink data to Kafka topics in append-only mode and Debezium mode. [#3923](https://github.com/risingwavelabs/risingwave/pull/3923) [#3682](https://github.com/risingwavelabs/risingwave/pull/3682) [#3674](https://github.com/risingwavelabs/risingwave/pull/3674) * Syntax change for `CREATE SOURCE`: A parameter name is no longer wrapped by single quotation marks. [#3997](https://github.com/risingwavelabs/risingwave/pull/3997). See the example: * Old: `CREATE SOURCE s1 WITH ( 'connector' = 'kafka', 'kafka.topic' = 'kafka_1_partition_topic', 'kafka.brokers' = '127.0.0.1:29092' ) ROW FORMAT json;` * New: `CREATE SOURCE s WITH ( connector = 'kafka', kafka.topic = 'kafka_1_partition_topic', kafka.brokers = '127.0.0.1:29092' ) ROW FORMAT json;` -### Assets +## Assets * Run this version from Docker: `run -it --pull=always -p 4566:4566 -p 5691:5691 ghcr.io/risingwavelabs/risingwave:v0.1.11 playground` @@ -1364,15 +1327,14 @@ This version was released on July 29, 2022. * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.11.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.11.tar.gz) -## v0.1.10 + -This version was released on July 5, 2022. + -### Main changes -#### SQL features +## SQL features -##### SQL operators and functions +### SQL operators and functions * Support string concatenation operator `||`. [#3147](https://github.com/risingwavelabs/risingwave/pull/3147) * Support interval comparison. [#3222](https://github.com/risingwavelabs/risingwave/pull/3222) @@ -1391,30 +1353,31 @@ This version was released on July 5, 2022. * `unnest()` for expanding nested tables to rows [#3017](https://github.com/risingwavelabs/risingwave/pull/3017) * Support `count()`, `min()`, and `max()` functions on these data types: _interval_, _timestamp_, _varchar_, and _date_. [#3069](https://github.com/risingwavelabs/risingwave/pull/3069) -##### SQL commands +### SQL commands * Support `EXPLAIN CREATE INDEX`. [#3229](https://github.com/risingwavelabs/risingwave/pull/3229) * Add cascade and restrict options in `REVOKE` commands. [#3363](https://github.com/risingwavelabs/risingwave/pull/3363) * Expand the `CREATE TABLE` syntax to support creating append-only tables. [#3058](https://github.com/risingwavelabs/risingwave/pull/3058) * Support the `CREATE USER` command and user authentication. [#3074](https://github.com/risingwavelabs/risingwave/pull/3074) -##### Data types +### Data types * Support implicit casts from single-quoted literals. [#3487](https://github.com/risingwavelabs/risingwave/pull/3487) * Add string as an alias for data type varchar. [#3094](https://github.com/risingwavelabs/risingwave/pull/3094) * Support string intervals. [#3037](https://github.com/risingwavelabs/risingwave/pull/3037) -##### Database management +### Database management * Add the default super user “postgres”. [#3127](https://github.com/risingwavelabs/risingwave/pull/3127) * The default schema name is changed to “public” from “dev”. [#3166](https://github.com/risingwavelabs/risingwave/pull/3166) -#### Connectors +## Connectors * Add random seed for the Datagen Source Connector. [#3124](https://github.com/risingwavelabs/risingwave/pull/3124) -### Assets +## Assets * [Prebuilt library for Linux](https://github.com/risingwavelabs/risingwave/releases/download/v0.1.10/risingwave-v0.1.10-x86_64-unknown-linux.tar.gz) * [Source code (zip)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.10.zip) * [Source code (tar.gz)](https://github.com/risingwavelabs/risingwave/archive/refs/tags/v0.1.10.tar.gz) + \ No newline at end of file diff --git a/sdks/go.mdx b/client-libraries/go.mdx similarity index 97% rename from sdks/go.mdx rename to client-libraries/go.mdx index 2cfb5168..23b09c7a 100644 --- a/sdks/go.mdx +++ b/client-libraries/go.mdx @@ -9,7 +9,7 @@ In this guide, we use the [`pgx` driver](https://github.com/jackc/pgx) to connec ## Run RisingWave -To learn about how to run RisingWave, see [Run RisingWave](/get-started.md#run-risingwave). +To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart.mdx). ## Install the `pgx` driver diff --git a/sdks/java.mdx b/client-libraries/java.mdx similarity index 97% rename from sdks/java.mdx rename to client-libraries/java.mdx index ddb8397b..0d10b413 100644 --- a/sdks/java.mdx +++ b/client-libraries/java.mdx @@ -9,7 +9,7 @@ In this guide, we use the [PostgreSQL JDBC](https://jdbc.postgresql.org/) driver ## Run RisingWave -To learn about how to run RisingWave, see [Run RisingWave](/get-started.md#run-risingwave). +To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart.mdx). > You do not need to connect to RisingWave at this stage. ## Download the PostgreSQL JDBC driver diff --git a/sdks/nodejs.mdx b/client-libraries/nodejs.mdx similarity index 97% rename from sdks/nodejs.mdx rename to client-libraries/nodejs.mdx index 750db45e..25840828 100644 --- a/sdks/nodejs.mdx +++ b/client-libraries/nodejs.mdx @@ -9,7 +9,7 @@ In this guide, we use the [Node.js pg driver](https://www.npmjs.com/package/pg) ## Run RisingWave -To learn about how to run RisingWave, see [Run RisingWave](/get-started.md#run-risingwave). +To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart.mdx). ## Install npm diff --git a/sdks/overview.mdx b/client-libraries/overview.mdx similarity index 64% rename from sdks/overview.mdx rename to client-libraries/overview.mdx index 582e5780..1ca9c8ef 100644 --- a/sdks/overview.mdx +++ b/client-libraries/overview.mdx @@ -1,24 +1,28 @@ --- title: "Overview" -description: "As RisingWave is wire-compatible with PostgreSQL, you have the flexibility to utilize third-party PostgreSQL drivers to seamlessly interact with RisingWave from your applications." +description: "You can use third-party PostgreSQL drivers to seamlessly interact with RisingWave from your applications." mode: wide --- -Here is an overview of the available options. We provide detailed example guides about how to interact with RisingWave for some of the drivers. For those without a guide, feel free to explore and utilize them based on your preferences and requirements. + +To interact with RisingWave from Python applications, we recommend using the [`risingwave-py`](https://pypi.org/project/risingwave-py/) SDK. +This SDK provides a simple way to perform ad-hoc queries, subscribe to changes, and define event handlers for tables and materialized views, making it easier to integrate real-time data into applications. For details, see [Python SDK](../python-sdk/intro). + +Here is an overview of the available options. We provide detailed example guides about how to interact with RisingWave for some of the drivers. For those without a guide, feel free to explore and utilize them based on your preferences and requirements. This table will be continuously updated to ensure compatibility. | Language | Driver | Latest tested version | -| ---------- | ------------------------------------------------------------------------------------------------------------ | --------------------- | +| :--------- | :----------------------------------------------------------------------------------------------------------- | :-------------------- | | C | [libpq](https://www.postgresql.org/docs/current/libpq.html) | | | C# (.NET) | [Npgsql](https://www.npgsql.org/) | 8.0.2 | -| Go | [pgx](https://pkg.go.dev/github.com/jackc/pgx/v5). See the [example guide](go.mdx). | v5.4.3 | +| Go | [pgx](https://pkg.go.dev/github.com/jackc/pgx/v5). See the [example guide](/client-libraries/go). | v5.4.3 | | Go | [pq](https://github.com/lib/pq) | | -| Java | [JDBC](https://jdbc.postgresql.org/). See the [example guide](java.md). | 42.5.4 | -| JavaScript | [pg](https://www.npmjs.com/package/pg). See the [example guide](nodejs.md). | 8.11.3 | -| Python | [psycopg2](https://pypi.org/project/psycopg2/). See the [example guide](../python/python.md). | | +| Java | [JDBC](https://jdbc.postgresql.org/). See the [example guide](/client-libraries/java). | 42.5.4 | +| JavaScript | [pg](https://www.npmjs.com/package/pg). See the [example guide](/client-libraries/nodejs). | 8.11.3 | +| Python | [psycopg2](https://pypi.org/project/psycopg2/). See the [example guide](/client-libraries/python). | | | Python | [psycopg3](https://pypi.org/project/psycopg/) | | -| Ruby | [pg](https://github.com/ged/ruby-pg). See the [example guide](ruby.md). | 1.5.6 | +| Ruby | [pg](https://github.com/ged/ruby-pg). See the [example guide](/client-libraries/ruby). | 1.5.6 | | Rust | [rust-postgres](https://crates.io/crates/postgres) | | | Rust | [tokio-postgres](https://docs.rs/tokio-postgres/latest/tokio%5Fpostgres/) | 0.7 | | PHP | [pdo-pgsql](https://www.php.net/manual/en/ref.pdo-pgsql.php) | 8.3.2 | diff --git a/client-libraries/python.mdx b/client-libraries/python.mdx new file mode 100644 index 00000000..f634b6bb --- /dev/null +++ b/client-libraries/python.mdx @@ -0,0 +1,147 @@ +--- +title: Python +description: Describes how to use third-party Python drivers to interact with RisingWave. +--- +To interact with RisingWave from Python applications, we recommend using the [`risingwave-py`](https://pypi.org/project/risingwave-py/) SDK. +This SDK provides a simple way to perform ad-hoc queries, subscribe to changes, and define event handlers for tables and materialized views, making it easier to integrate real-time data into applications. For details, see [Python SDK](../python-sdk/intro). + +As RisingWave is wire-compatible with PostgreSQL, you can also use third-party PostgreSQL drivers like `psycopg2` and `sqlalchemy` to interact with RisingWave from your Python applications. + +## Use `psycopg2` to connect to RisingWave + +In this section, we use the [`psycopg2`](https://pypi.org/project/psycopg2/) driver to connect to RisingWave. + +### Run RisingWave + +To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart.mdx). + + +### Install the `psgcopg2` driver + +For information about how to install `psycopg` and the difference between `psycopg` and `psycopg-binary`, see the [official psycopg documentation](https://www.psycopg.org/docs/install.html). + + +### Connect to RisingWave + +To connect to RisingWave via `psycopg2`: + +```python +import psycopg2 + +conn = psycopg2.connect(host="127.0.0.1", port=4566, user="root", dbname="dev") +``` + +### Create a source + +The code below creates a source `walk` with the `datagen` connector. The `datagen` connector is used to generate mock data. The `walk` source consists of two columns, `distance` and `duration`, which respectively represent the distance and the duration of a walk. The source is a simplified version of the data that is tracked by smart watches. + +```python +import psycopg2 + +conn = psycopg2.connect(host="localhost", port=4566, user="root", dbname="dev") # Connect to RisingWave. +conn.autocommit = True # Set queries to be automatically committed. + +with conn.cursor() as cur: + cur.execute(""" +CREATE TABLE walk(distance INT, duration INT) +WITH ( + connector = 'datagen', + fields.distance.kind = 'sequence', + fields.distance.start = '1', + fields.distance.end = '60', + fields.duration.kind = 'sequence', + fields.duration.start = '1', + fields.duration.end = '30', + datagen.rows.per.second='15', + datagen.split.num = '1' +) FORMAT PLAIN ENCODE JSON""") # Execute the query. + +conn.close() # Close the connection. +``` + +All the code examples in this guide include a section for connecting to RisingWave. If you perform multiple actions within one connection session, you do not need to repeat this section. + + + +### Create a materialized view + +The code in this section creates a materialized view `counter` to capture the latest total distance and duration. + +```python +import psycopg2 + +conn = psycopg2.connect(host="localhost", port=4566, user="root", dbname="dev") +conn.autocommit = True + +with conn.cursor() as cur: + cur.execute("""CREATE MATERIALIZED VIEW counter + AS SELECT + SUM(distance) as total_distance, + SUM(duration) as total_duration + FROM walk;""") + +conn.close() +``` + +### Query a materialized view + +The code in this section queries the materialized view `counter` to get real-time data. + +```python +import psycopg2 + +conn = psycopg2.connect(host="localhost", port=4566, user="root", dbname="dev") +conn.autocommit = True + +with conn.cursor() as cur: + cur.execute("SELECT * FROM counter;") + print(cur.fetchall()) +conn.close() +``` + +## Use `sqlalchemy` to connect to RisingWave + +In this section, we use the [SQLAlchemy](https://www.sqlalchemy.org) driver to connect to RisingWave. + +### Run RisingWave + +To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart). + +### Install necessary Python packages + +Ensure you have Python3 installed. + +For more information about `sqlalchemy`, see the [SQLAlchemy](https://www.sqlalchemy.org). Refer to the documentation version that corresponds to the version of SQLAlchemy that you run. + +For information about how to install `psycopg-binary`, see the [official psycopg documentation](https://www.psycopg.org/docs/install.html). + +```terminal +pip3 install SQLAlchemy sqlalchemy-risingwave psycopg2-binary +``` + +### Connect to RisingWave + +To connect to RisingWave via `sqlalchemy`: + +```python +DB_URI = 'risingwave+psycopg2://root@risingwave-standalone:4566/dev' + +engine = create_engine(DB_URI) +``` + +Note that RisingWave does not provide direct compatibility with `sqlaclehmy-postgres` so `risingwave+psycopg2` is used as the URI scheme. The rest of the URL follows the same format as the PostgreSQL driver. + +### Create a source + +The code below creates a table `users` using the engine created in the previous section. + +```python +with engine.connect() as conn: + conn.execute("""CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY, + name VARCHAR, + age INTEGER)""") +``` + +You can create materialized views and query from materialized views using the same format shown above. + diff --git a/sdks/ruby.mdx b/client-libraries/ruby.mdx similarity index 95% rename from sdks/ruby.mdx rename to client-libraries/ruby.mdx index eca28c93..5504f1fe 100644 --- a/sdks/ruby.mdx +++ b/client-libraries/ruby.mdx @@ -8,7 +8,7 @@ In this guide, we use the [`ruby-pg`](https://github.com/ged/ruby-pg) driver to ## Run RisingWave -To learn about how to run RisingWave, see [Run RisingWave](/get-started.md#run-risingwave). +To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart.mdx). ## Install the `ruby-pg` driver @@ -25,11 +25,11 @@ require 'pg' conn = PG.connect(host: '127.0.0.1', port: 4566, dbname: 'dev', user: 'root') ``` -:::note + The `BasicTypeMapForResults` class isn't supported currently, you need to cast RisingWave types into Ruby types manually. -::: + ## Create a source @@ -57,11 +57,11 @@ EOF conn.exec(sql) # Execute the query. ``` -:::note + All the code examples in this guide include a section for connecting to RisingWave. If you perform multiple actions within one connection session, you do not need to repeat this section. -::: + ## Create a materialized view diff --git a/cloud/check-spending-details.mdx b/cloud/check-spending-details.mdx index 68e78c52..3dc0a89e 100644 --- a/cloud/check-spending-details.mdx +++ b/cloud/check-spending-details.mdx @@ -1,6 +1,6 @@ --- title: "Check spending details" -descriptin: You can view the usage and the corresponding charges for each project during the ongoing billing period. You can also download a PDF version of the invoice for your records. +description: You can view the usage and the corresponding charges for each project during the ongoing billing period. You can also download a PDF version of the invoice for your records. --- diff --git a/cloud/check-status-and-metrics.mdx b/cloud/check-status-and-metrics.mdx index 582d5558..2438d08c 100644 --- a/cloud/check-status-and-metrics.mdx +++ b/cloud/check-status-and-metrics.mdx @@ -28,8 +28,5 @@ The project details page includes: * [Database users](/cloud/manage-database-users/) -**TIP** - To specify the time range of the metrics, go to the **Metrics** tab, and click on **Last 30 minutes** in the top right corner to customize your time range. - diff --git a/cloud/choose-a-project-plan.mdx b/cloud/choose-a-project-plan.mdx index b236c751..d0ad11ef 100644 --- a/cloud/choose-a-project-plan.mdx +++ b/cloud/choose-a-project-plan.mdx @@ -8,7 +8,7 @@ Each project type is associated with the corresponding features, capabilities, r Currently, RisingWave Cloud offers three types of projects: **Trial**, **Standard**, and **Advanced**. The table below describes a high-level comparison of features and restrictions across three project types. | Service type | Trial | Standard | Advanced | -| ----------------- | --------------------------------------------- | ---------------------------------------------------------------- | ----------------------------------------------- | +| :---------------- | :-------------------------------------------- | :--------------------------------------------------------------- | :---------------------------------------------- | | Deployment type | Multi-tenancy deployment, single-node project | Multi-tenancy deployment, multi-node project | Multi-tenancy deployment, multi-node project | | Description | Standalone deployment with 2-RWU resources. | Deployed on shared Kubernetes service with customized resources. | Customized project deployment based on requests | | Pricing | Free | Pay-as-you-go | Customized, contact sales | diff --git a/cloud/connect-to-a-project.mdx b/cloud/connect-to-a-project.mdx index 3fc0cee3..0b6cd9d1 100644 --- a/cloud/connect-to-a-project.mdx +++ b/cloud/connect-to-a-project.mdx @@ -25,11 +25,9 @@ To connect with any local clients, follow the steps below: * RisingWave Cloud creates a default user for every provisioned project since v1.7.1\. The default user is authenticated with a temporary token under the OAuth 2.0 protocol to ease the burden on developers. For default users, RisingWave Cloud offers the `psql` command and a general `Connection String` for a quick connection. * Alternatively, you can create a new user, RisingWave Cloud offers `psql`, `Connection String`, `Parameters Only`, `Java`, `Node.js`, `Python`, and `Golang` as connection options. - -**NOTE** - + To connect via `psql`, you need to [Install psql](/docs/current/install-psql-without-postgresql/) in your environment. `psql` is a command-line interface for interacting with PostgreSQL databases, including RisingWave. - + 3. You may need to set up a CA certificate to enable SSL connections. See the instructions displayed on the portal for more details. 4. Copy the command and run it in a terminal window. @@ -39,11 +37,9 @@ To connect via `psql`, you need to [Install psql](/docs/current/install-psql-wit |----------------|---------------| | | | - -**NOTE** - + If you choose `Java`, `Node.js`, `Python`, or `Golang` as the startup mode, replace `` in the command with the password you set when creating a new user. - + ## What's next :@:/` directly. - -**NOTE** - + Not all clients support the `options` field. If your client does not support the `options` field, you can use solution 2 or 3. - + ### Solution 2: Put the tenant identifier in the host @@ -31,11 +29,9 @@ You can put the tenant identifier in the host in the format of `:@.:/ -**NOTE** - + Not all clients support SNI routing. If your client does not support SNI routing, you can use solution 1 or 3. - + ### Solution 3: Put the tenant identifier in the username[](#solution-3-put-the-tenant-identifier-in-the-username "Direct link to Solution 3: Put the tenant identifier in the username") @@ -45,11 +41,9 @@ You can also put the tenant identifier in the username in the format of `;:@:/ -**NOTE** - + The server will use `AuthenticationCleartextPassword` response to authenticate the user. Learn more about the protocol in the [PostgreSQL documentation](https://www.postgresql.org/docs/current/protocol-flow.html). - + ## The tenant identifier is not specified @@ -58,7 +52,7 @@ This error occurs when your Postgres client doesn't support Server Name Indicati RisingWave Cloud has tested the following drivers for SNI support: | Driver | Language | SNI support | Note | -| ---------- | -------- | ----------- | -------------------------------------------- | +| :--------- | :------- | :---------- | :------------------------------------------- | | psycopg2 | Python | | Requires the 'sslmode': 'require' option | | asyncpg | Python | | Requires the 'sslmode': 'verify-full' option | | SQLAlchemy | Python | | Requires the 'sslmode': 'require' option | diff --git a/cloud/create-a-connection.mdx b/cloud/create-a-connection.mdx index 049c7b3a..a9fbda09 100644 --- a/cloud/create-a-connection.mdx +++ b/cloud/create-a-connection.mdx @@ -51,8 +51,6 @@ You can find it in the [Azure Portal](https://portal.azure.com/) → **Private l 5. Click **Confirm** to create the connection. -**TIP** - For inquiries about PrivateLink for Confluent private Kafka clusters, please reach out to our [support team](mailto:cloud-support@risingwave-labs.com) first. We will handle these manual steps: * Before provisioning a RisingWave PrivateLink, ensure the cluster's Availability Zones (AZs) are a subset of the AZs offered by RisingWave. diff --git a/cloud/export-metrics.mdx b/cloud/export-metrics.mdx index 33db2bc6..a6612c53 100644 --- a/cloud/export-metrics.mdx +++ b/cloud/export-metrics.mdx @@ -4,8 +4,6 @@ description: "This article describes how to use metrics API to export metrics fr --- -**TIP** - By default, a newly created service account isn't given any permissions. To export metrics, you need to grant your service account the `ProjectAdmin` role. See [RBAC](/cloud/organization-rbac/#role-permissions-and-limitations) for more details. @@ -18,7 +16,7 @@ Generate the API key ID and API key secret in the Cloud Portal. See [Generate an Get the corresponding `CLOUD_HOST` for your region and Cloud provider from the table below: | Region/CloudProvider | CLOUD\_HOST | -| -------------------- | ------------------------------------ | +| :------------------- | :----------------------------------- | | useast2/aws | canary-useast2-mgmt.risingwave.cloud | | us-east-1/aws | prod-aws-usea1-mgmt.risingwave.cloud | | us-west-2/aws | prod-aws-uswe2-mgmt.risingwave.cloud | @@ -31,11 +29,9 @@ Get the corresponding `CLOUD_HOST` for your region and Cloud provider from the t Choose one of the following methods to configure monitoring systems. - -**NOTE** - + The metrics are formatted according to [Prometheus](https://prometheus.io/docs/concepts/metric%5Ftypes/) standards. If your monitoring collection mode is compatible with the Prometheus format, refer to the Prometheus section below to configure the collection. - + diff --git a/cloud/manage-payment-methods.mdx b/cloud/manage-payment-methods.mdx index a1afc858..97ebfbc6 100644 --- a/cloud/manage-payment-methods.mdx +++ b/cloud/manage-payment-methods.mdx @@ -1,13 +1,12 @@ --- title: "Manage payment methods" -description: "You can manage your payment methods for your organization in RisingWave Cloud. The saved payment methods will be used to make automatic payments of the monthly bill after each billing period. You can also use the saved payment methods to settle outstanding payments manually if the automatic payment fails." --- +You can manage your payment methods for your organization in RisingWave Cloud. The saved payment methods will be used to make automatic payments of the monthly bill after each billing period. You can also use the saved payment methods to settle outstanding payments manually if the automatic payment fails. All members of the organization can view and manage payment methods." + -**INFO** +Currently, RisingWave Cloud only supports credit cards as the payment method. -* Currently, RisingWave Cloud only supports credit cards as the payment method. -* All members of the organization can view and manage payment methods. ## Add a payment method diff --git a/cloud/manage-sources.mdx b/cloud/manage-sources.mdx index e6b276e3..c8971ea1 100644 --- a/cloud/manage-sources.mdx +++ b/cloud/manage-sources.mdx @@ -15,11 +15,11 @@ You can create a source with one of the following methods: 2. Specify the project and click its **Workspace**. 3. Next to **Source** tab, click **\+ Add new**. 4. Select the service you want to connect to. - -**NOTE** + -More services will be supported in future releases. - + More services will be supported in future releases. + + 5. Configure the connector settings, source details, and schema according to the instructions of the guided setup. 6. Check the generated SQL statement and click **Confirm** to create the source in your database. @@ -32,7 +32,6 @@ Refer to [CREARE SOURCE](/docs/current/sql-create-source/#supported-sources) in Click on a source to view its details, including the connector settings, schema, throughput, errors, and running status. -**TIP** When checking throughput and errors, you can click **Last 30 minutes** on the right side to customize your time range. diff --git a/cloud/manage-users.mdx b/cloud/manage-users.mdx index 69054da7..9c25301a 100644 --- a/cloud/manage-users.mdx +++ b/cloud/manage-users.mdx @@ -22,11 +22,9 @@ You can invite others to create a RisingWave Cloud account and join your organiz 2. Click **Invite new user**. 3. Enter the email address of the user you want to invite. - -**NOTE** - + You cannot invite an existing user (whose email address is already registered on RisingWave Cloud) to join your organization. - + 1. Click **Send invite**. diff --git a/cloud/manage-your-account.mdx b/cloud/manage-your-account.mdx index 0be789ba..f0c7ea5f 100644 --- a/cloud/manage-your-account.mdx +++ b/cloud/manage-your-account.mdx @@ -30,11 +30,9 @@ To delete your RisingWave Cloud account and all its associated data: 3. Check **Yes, I want to delete the account**. 4. Click **Delete** to confirm the decision. - -**NOTE** - + If you are the last admin of the organization, deleting your account will also delete the organization. - + ## Switch accounts diff --git a/cloud/organization-rbac.mdx b/cloud/organization-rbac.mdx index 2153d911..63946482 100644 --- a/cloud/organization-rbac.mdx +++ b/cloud/organization-rbac.mdx @@ -18,7 +18,7 @@ Below are permissions and limitations for roles to ensure that each service acco To grant a role to your account, go to [Organization](https://www.risingwave.cloud/project/organization/service-account/) \> Role management, then click **Edit roles** on the right side. | Role | Permissions | Limitations | -| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | OrganizationAdmin | Full control over tenants and related resources.Management of service accounts, users, invitations, and RoleBinding.Access to all billing resources. | Cannot modify their own admin RoleBinding. | | OrganizationMember | View access to all tenants.View service accounts, users, and invitations. | No permissions for tenant-related operations (create, update, delete).No permissions for service accounts, users, or invitations operations (create, update, delete).No access to billing resources. | | BillingManager | Full access to all billing resources. | No access to any other operations outside of billing. | @@ -35,15 +35,13 @@ Only the OrganizationAdmin has the permission to manage user's RoleBinding. ### Scenario | User scenarios | Description | -| ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Invite a user to the organization | Currently, you can only invite a new user as an OrganizationMember. If you want to grant more permissions to the target user, please go to **Organization** \> **Role management** \> **Users** to modify after the user accepts the invitation. | | Create a service account in the organization | The service account RoleBinding is used for authorization when accessing Cloud APIs using the service account's API keys. By default, the service account is assigned the read-only OrganizationMember role. If you need to assign more permissions to the service account, please go to **Organization** \> **Role management** \> **Service Accounts** to add other roles. | | Delete or add RoleBinding for a user | Go to **Organization** \> **Role management** \> **Users**, click the corresponding Edit Roles of the specific role. A popup window will appear, allowing you to uncheck the role or select the new ones. Click **Confirm** to save the change. | | Delete or add RoleBinding for the service account | Go to **Organization** \> **Role management** \> **Users**, click the corresponding Edit Roles of the specific service account. A popup window will appear, allowing you to uncheck the role or select the new ones. Click **Confirm** to save the change. | - -**NOTE** - + Every organization needs at least one OrganizationAdmin user. Any attempt to delete the last OrganizationAdmin RoleBinding will fail. - + diff --git a/cloud/pricing.mdx b/cloud/pricing.mdx index 514f5e5a..07b96b9b 100644 --- a/cloud/pricing.mdx +++ b/cloud/pricing.mdx @@ -8,7 +8,7 @@ description: RisingWave Cloud offers a flexible pricing model based on your usag RisingWave Cloud charges the cost of each project individually. The pricing model of each project varies depending on its plan. | Plan | Pricing model | Pricing precision | -| -------------------------- | ------------- | ----------------- | +| :------------------------- | :------------ | :---------------- | | [Trial](#trial-plan) | Free | / | | [Standard](#standard-plan) | Pay-as-you-go | 30-second basis | | [Advanced](#advanced-plan) | Contact sales | Contact sales | diff --git a/cloud/privatelink-overview.mdx b/cloud/privatelink-overview.mdx index acd3c3e3..73db4ec8 100644 --- a/cloud/privatelink-overview.mdx +++ b/cloud/privatelink-overview.mdx @@ -12,11 +12,9 @@ RisingWave Cloud utilizes the private connection capability of the underlying Cl * [GCP Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect) * [Azure Private Link](https://learn.microsoft.com/en-us/azure/private-link/) - -**NOTE** - + Azure Private Link integration is currently in development and will be available soon. - + The diagram below depicts a high-level overview of how PrivateLink service works. All three platforms share the same pattern of network structure so that you can configure them in the same way automatically. diff --git a/cloud/project-byoc.mdx b/cloud/project-byoc.mdx index ff98f203..4e5ab6a8 100644 --- a/cloud/project-byoc.mdx +++ b/cloud/project-byoc.mdx @@ -19,8 +19,6 @@ Follow the steps below to create your own cloud environment. 3. Once you've redeemed the invitation code, select **BYOC** as the deployment type, and select your cloud platform as AWS or GCP (see [Resource and permission](#resource-and-permission) for more details), region, and ID as necessary. 4. After configuring these settings, you'll see additional instructions on your screen. Follow these steps to establish your BYOC environment. Please be aware that the final command `rwc byoc apply --name xxx` may take 30 to 40 minutes to complete, and a progress bar will be shown to keep you updated. During this time, it's crucial to ensure a stable internet connection. If the command is interrupted or fails due to network instability, you can safely retry it. -**TIP** - When you run the command `rwc byoc apply --name xxx`, it will deploy some resources in your AWS/GCP/Azure environment, such as AWS S3/Google Cloud Storage/Azure Blob Storage and EKS/GKE/AKS clusters. Please do not modify the configuration of these resources. If you encounter any issues during this process, please contact our [support team](mailto:cloud-support@risingwave-labs.com). 5. Click **Next** to continue the configuration of cluster size and nodes. To learn more about the nodes, see the [architecture of RisingWave](/docs/current/architecture/). @@ -66,6 +64,8 @@ You need to enable the following APIs to create or delete a BYOC environment: * **Cloud DNS API** for VPC private service connect setup. * **Kubernetes Engine API** for provisioning the GKE cluster the data plane is hosted. * **Cloud Resource Manager API** for IAM provisioning. + * **Service Networking API** for Cloud SQL (as meta store) connection. + * **Cloud SQL Admin API** for Cloud SQL (as meta store) provisioning. * **Required permission for BYOC environment creation/deletion** Before running the command-line interface to create or delete a BYOC environment, you need to have a Google IAM (IAM user/Service account) with the following roles. * [Compute Network Admin](https://cloud.google.com/iam/docs/understanding-roles#compute.networkAdmin) @@ -76,11 +76,11 @@ Before running the command-line interface to create or delete a BYOC environment * [Service Account Admin](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountAdmin) * [Service Account User](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) * [Storage Admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) - -**NOTE** + These permissions are only required for creating or deleting a BYOC environment. Once the environment is up and running, limited permissions are needed to operate the services. - + + * **Resources provisioned in BYOC environment** We will set up the following resources in a BYOC environment: * 1 VPC: including VPC, its subnets, firewalls, IPs to host all BYOC resources. diff --git a/cloud/review-and-pay-invoices.mdx b/cloud/review-and-pay-invoices.mdx index 65d8b880..53021da0 100644 --- a/cloud/review-and-pay-invoices.mdx +++ b/cloud/review-and-pay-invoices.mdx @@ -40,7 +40,7 @@ The automatic payment process could fail if your payment methods are invalid or To pay an invoice manually, click the **Pay Now** button on the invoice you want to pay and follow the instructions on the screen. -**CAUTION** + If you don't settle the outstanding payments within 10 days of the "Overdue" period, all services that you’re currently using will be stopped and you won't be able to create any new projects. You have to contact the support team to resume the services. diff --git a/cloud/scale-a-project-manually.mdx b/cloud/scale-a-project-manually.mdx index 24176899..486c21ea 100644 --- a/cloud/scale-a-project-manually.mdx +++ b/cloud/scale-a-project-manually.mdx @@ -3,11 +3,9 @@ title: "Scale a project manually" description: "After creating a project, you have the flexibility to scale its resources and capacity to meet your specific data processing and analysis needs. This can be achieved through two methods: increasing or decreasing the number of worker nodes (horizontal scaling) or adjusting the resource capacity of each node in the project (vertical scaling)." --- - -**NOTE** - + You can scale the projects created in the Standard plan and the Advanced plan. The Trial plan has a fixed number of nodes and resources. - + ## Scale your project diff --git a/cloud/service-account.mdx b/cloud/service-account.mdx index 9b17023f..a9891d01 100644 --- a/cloud/service-account.mdx +++ b/cloud/service-account.mdx @@ -41,7 +41,7 @@ You can generate multiple API keys for a service account. Each API key will have 3. A new API key will be generated. Please save the secret along with the ID to a safe place. -**CAUTION** + The secret will be shown only once. After you close the dialog, you won't be able to retrieve it. If you lose the secret, you have to generate a new API key. diff --git a/cloud/sso.mdx b/cloud/sso.mdx index abd40c34..daccf33b 100644 --- a/cloud/sso.mdx +++ b/cloud/sso.mdx @@ -22,7 +22,7 @@ During the setup, provide placeholder values for the following fields: Configure the properties below on the IdP platform: | Property | Description | -| -------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **IdP Single Sign-On URL** | URL of the receiver of the SAML AuthNRequest. Use a placeholder value initially. You'll get the actual value from your IdP after providing it with the Atlas metadata. | | **IdP Signature Certificate** | PEM-encoded public key certificate of the IdP. You can obtain this value from your IdP. You can either upload the certificate from your computer or paste the contents into a text box. | | **Request Binding** | SAML Authentication Request Protocol binding used to send the AuthNRequest. It can be either **HTTP POST** or **HTTP REDIRECT**. | diff --git a/cloud/stop-and-delete-projects.mdx b/cloud/stop-and-delete-projects.mdx index 61c0f15d..108a67af 100644 --- a/cloud/stop-and-delete-projects.mdx +++ b/cloud/stop-and-delete-projects.mdx @@ -22,8 +22,6 @@ Please ensure that all critical tasks are safely paused before proceeding. You c If you no longer need a project and its associated data, you can delete it to free up resources. -**INFO** - You must delete all projects before [deleting your account](/cloud/manage-your-account/#delete-your-account). diff --git a/cloud/update-database-version.mdx b/cloud/update-database-version.mdx index 710e7e48..5199a3b2 100644 --- a/cloud/update-database-version.mdx +++ b/cloud/update-database-version.mdx @@ -17,8 +17,6 @@ Before the upgrade, ensure that all critical data are backed up and all critical 2. Click the rocket icon next to the project you want to update the database version. 3. Wait for the update to complete. This may take a few minutes. - -**NOTE** - + You can only update the RisingWave version of a project to a newer version. You cannot downgrade it. - + diff --git a/delivery/overview.mdx b/delivery/overview.mdx index 22e4372b..1b8f1738 100644 --- a/delivery/overview.mdx +++ b/delivery/overview.mdx @@ -6,7 +6,7 @@ sidebarTitle: Overview To stream data out of RisingWave, you must create a sink. A sink is an external target that you can send data to. Use the [CREATE SINK](/docs/current/sql-create-sink/) statement to create a sink. You need to specify what data to be exported, the format, and the sink parameters. -Sinks become visible right after you create them, regardless of the backfilling status. Therefore, it's important to understand that the data in the sinks may not immediately reflect the latest state of their upstream sources due to the latency of the sink, connector, and backfilling process. To determine whether the process is complete and the data in the sink is consistent, refer to [Monitor statement progress](/docs/current/view-statement-progress/). +Sinks become visible right after you create them, regardless of the backfilling status. Therefore, it's important to understand that the data in the sinks may not immediately reflect the latest state of their upstream sources due to the latency of the sink, connector, and backfilling process. To determine whether the process is complete and the data in the sink is consistent, refer to [Monitor statement progress](/docs/current/monitor-statement-progress/). Currently, RisingWave supports the following sink connectors: @@ -94,15 +94,15 @@ When creating an `upsert` sink, note whether or not you need to specify the prim * If the downstream system supports primary keys but the table in the downstream system has no primary key, then RisingWave does not allow users to create an upsert sink. A primary key must be defined in the table in the downstream system. * If the downstream system does not support primary keys, then users must define the primary key when creating an upsert sink. -## Sink data in parquet format +## Sink data in parquet or json encode **PUBLIC PREVIEW** -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +Sink data in parquet encode is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). -RisingWave supports sinking data in Parquet format to file systems including S3, Google Cloud Storage (GCS), and Azure Blob Storage. This eliminates the need for complex data lake setups. Once the data is saved, the files can be queried using the batch processing engine of RisingWave through the `file_scan` API. You can also leverage third-party OLAP query engines for further data processing. +RisingWave supports sinking data in Parquet or JSON encode to file systems including S3, Google Cloud Storage (GCS), Azure Blob Storage, and WebHDFS. This eliminates the need for complex data lake setups. Once the data is saved, the files can be queried using the batch processing engine of RisingWave through the `file_scan` API. You can also leverage third-party OLAP query engines for further data processing. Below is an example to sink data to S3: @@ -121,9 +121,6 @@ WITH ( ) FORMAT PLAIN ENCODE PARQUET(force_append_only='true'); ``` - -**NOTE** - + File sink currently supports only append-only mode, so please change the query to `append-only` and specify this explicitly after the `FORMAT ... ENCODE ...` statement. - - + diff --git a/delivery/risingwave-as-postgres-fdw.mdx b/delivery/risingwave-as-postgres-fdw.mdx index cd8e5d4a..5b2ff019 100644 --- a/delivery/risingwave-as-postgres-fdw.mdx +++ b/delivery/risingwave-as-postgres-fdw.mdx @@ -146,18 +146,16 @@ SELECT * FROM city_population; seattle | 2 ``` - -**NOTE** - + Currently, write operations to RisingWave through a foreign data wrapper are not supported. The data in the foreign table is read-only. - + ## Differences between sinking to Postgres and using FDW in Postgres There are two main methods to interact between RisingWave and PostgreSQL: sinking data to PostgreSQL and utilizing a foreign data wrapper of PostgreSQL to access data in RisingWave. The table below provides a summary of the differences between these two methods. Your choice between these methods will depend on your specific requirements, data architecture, and performance considerations. | Aspect | Sinking to PostgreSQL | Using PostgreSQL FDW to access data | -| -------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------ | +| :------------------------- | :------------------------------------------------------ | :----------------------------------------------------------------- | | Data Access | Data is physically stored in PostgreSQL | Data is physically stored in RisingWave | | Performance | Potential latency for RisingWave to write to PostgreSQL | Potential latency when reading data from RisingWave | | Message Delivery Guarantee | At-least-once while sinking into PostgreSQL tables | Exactly-once for MVs and the data is not moved | diff --git a/delivery/subscription.mdx b/delivery/subscription.mdx index 8902cf9e..d22bc6ca 100644 --- a/delivery/subscription.mdx +++ b/delivery/subscription.mdx @@ -77,12 +77,10 @@ If you specify `FULL` instead of the `since_clause`, the subscription cursor sta ### Fetch from cursor - -**NOTE** - + FETCH from cursor function is supported in the PSQL simple query mode and extended mode. + - #### Non-blocking data fetch ```sql diff --git a/demos/real-time-ad-performance-analysis.mdx b/demos/real-time-ad-performance-analysis.mdx index 230a75cc..bd930b5d 100644 --- a/demos/real-time-ad-performance-analysis.mdx +++ b/demos/real-time-ad-performance-analysis.mdx @@ -105,8 +105,6 @@ CREATE SOURCE ad_click ( ``` -**TIP** - `scan.startup.mode = 'earliest'` means the source will start streaming from the earliest entry in Kafka. Internally, RisingWave will record the consumed offset in the persistent state so that during a failure recovery, it will resume from the last consumed offset. @@ -209,8 +207,6 @@ FROM ``` -**TIP** - You can easily build a CTR live dashboard on top of `ad_ctr_5min`. The CTR value is dynamically changing and every ad CTR in a given window can be drawn as a plot in the line chart. Eventually, we are able to analyze how CTR changes over time. diff --git a/deploy/node-specific-configurations.mdx b/deploy/node-specific-configurations.mdx index d91b2321..16c46e97 100644 --- a/deploy/node-specific-configurations.mdx +++ b/deploy/node-specific-configurations.mdx @@ -87,7 +87,7 @@ unsafe_enable_strict_consistency = true RisingWave now supports the following configurations: | Configuration | Default | Description | -| ----------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :---------------------------------- | :------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | unsafe\_enable\_strict\_consistency | true | Control the strictness of stream consistency. When set to false, data inconsistency like double-insertion or double-deletion with the same primary keys will be tolerated. | ### Storage configurations @@ -129,7 +129,7 @@ The configuration of the file cache and the block cache refilling is separated i Below is an example of the data file cache configuration for your reference. Please be aware that the data file cache configuration and the meta file cache configuration share the same options. | Configuration | Default | Description | -| ----------------------- | ------- | ----------------------------------------------------------------------------------------------------------------- | +| :---------------------- | :------ | :---------------------------------------------------------------------------------------------------------------- | | dir | "" | The directory for the file cache. If left empty, the file cache will be disabled. | | capacity\_mb | 1024 | The file cache capacity in MB. | | file\_capacity\_mb | 64 | The capacity for each cache file in MB. | @@ -147,7 +147,7 @@ RisingWave uses a recent filter to decide whether to fill a block or unit. The r Below is an example of the cache refill configuration for your reference. | Configuration | Default | Description | -| ------------------------------------ | ------- | ------------------------------------------------------------------------------------- | +| :----------------------------------- | :------ | :------------------------------------------------------------------------------------ | | data\_refill\_levels | \[\] | Only blocks in the given levels will be refilled. | | timeout\_ms | 6000 | The metadata update will be delayed at most timeout\_ms to wait for refilling. | | concurrency | 10 | Block refilling concurrency (by unit level). | diff --git a/deploy/risingwave-k8s-helm.mdx b/deploy/risingwave-k8s-helm.mdx index 0dc3b758..d4cba9e2 100644 --- a/deploy/risingwave-k8s-helm.mdx +++ b/deploy/risingwave-k8s-helm.mdx @@ -47,11 +47,10 @@ Customize your configuration for the RisingWave deployment by editing the [value * **Customize meta store**: The meta store in RisingWave holds metadata for cluster operations. See [Configuration](https://github.com/risingwavelabs/helm-charts/blob/main/docs/CONFIGURATION.md#customize-meta-store) for all the available options and [Examples](https://github.com/risingwavelabs/helm-charts/tree/main/examples/meta-stores) for detailed usage of meta stores. * **Customize state store**: The state store in RisingWave serves as a fault-tolerant storage system for preserving system state. See [Configuration](https://github.com/risingwavelabs/helm-charts/blob/main/docs/CONFIGURATION.md#customize-state-store) for all the available options and [Examples](https://github.com/risingwavelabs/helm-charts/tree/main/examples/state-stores) for detailed usage of state stores. * **Bundled PostgreSQL and MinIO**: If you want to use `PostgreSQL` as the meta store and `MinIO` as the state store, the Helm chart for RisingWave offers the option to bundle them together. This allows for a quick and easy setup of the Helm chart. See [Configuration](https://github.com/risingwavelabs/helm-charts/blob/main/docs/CONFIGURATION.md#bundled-etcdpostgresqlminio-as-stores) for more details. To enable this feature, set `tags.bundle=true`. - -**NOTE** + Before using the bundled `PostgreSQL` and `MinIO`, and any local stores, ensure that you have implemented the [Dynamic Volume Provisioning](https://kubernetes.io/docs/concepts/storage/dynamic-provisioning/). - + Install the latest RisingWave Helm chart: diff --git a/deploy/risingwave-kubernetes.mdx b/deploy/risingwave-kubernetes.mdx index b58fef8a..be451032 100644 --- a/deploy/risingwave-kubernetes.mdx +++ b/deploy/risingwave-kubernetes.mdx @@ -18,14 +18,11 @@ Ensure that [Docker](https://docs.docker.com/desktop/) is installed in your envi ## Create a Kubernetes cluster - -**INFO** - + The steps in this section are intended for creating a Kubernetes cluster in your local environment. If you are using a managed Kubernetes service such as AKS, GKE, and EKS, refer to the corresponding documentation for instructions. - + -**Steps:** @@ -57,7 +54,6 @@ Before the deployment, ensure that the following requirements are satisfied. * `kubectl` version ≥ 1.18 * For Linux, set the value of the `sysctl` parameter [net.ipv4.ip\_forward](https://linuxconfig.org/how-to-turn-on-off-ip-forwarding-in-linux) to 1. -**Steps:** [Install cert-manager](https://cert-manager.io/docs/installation/) and wait a minute to allow for initialization. @@ -76,19 +72,17 @@ kubectl apply --server-side -f https://github.com/risingwavelabs/risingwave-oper ``` **Compatibility table** | Operator | RisingWave | Kubernetes | -| -------- | ---------- | ---------- | +| :------- | :--------- | :--------- | | v0.4.0 | v0.18.0+ | v1.21+ | | v0.3.6 | v0.18.0+ | v1.21+ | You can find the release notes of each version [here](https://github.com/risingwavelabs/risingwave-operator/releases). - -**NOTE** - + The following errors might occur if `cert-manager` is not fully initialized. Simply wait for another minute and rerun the command above. ```bash Error from server (InternalError): Internal error occurred: failed calling webhook "webhook.cert-manager.io": failed to call webhook: Post "": dial tcp 10.105.102.32:443: connect: connection refused ``` - + _**Optional:**_ Check if the Pods are running. @@ -183,11 +177,9 @@ spec: - -**NOTE** - + The performance of MinIO is closely tied to the disk performance of the node where it is hosted. We have observed that AWS EBS does not perform well in our tests. For optimal performance, we recommend using S3 or a compatible cloud service. - + ```yaml spec: stateStore: @@ -353,14 +345,14 @@ core-site.xml hdfs-site.xml ``` -1. Next, create a ConfigMap, where `hadoop-conf` is the name of ConfigMap: +2. Next, create a ConfigMap, where `hadoop-conf` is the name of ConfigMap: ```bash kubectl create configmap hadoop-conf --from-file $HADOOP_HOME/etc/hadoop ``` -1. Then mount the Hadoop configuration files using this ConfigMap: +3. Then mount the Hadoop configuration files using this ConfigMap: ```yaml @@ -486,7 +478,6 @@ You can check the status of the RisingWave instance by running the following com ```bash kubectl get risingwave - ``` If the instance is running properly, the output should look like this: @@ -502,8 +493,6 @@ risingwave True postgresql S3 30s By default, the Operator creates a service for the frontend component, through which you can interact with RisingWave, with the type of `ClusterIP`. But it is not accessible outside Kubernetes. Therefore, you need to create a standalone Pod for PostgreSQL inside Kubernetes. -**Steps:** - ```bash @@ -525,8 +514,6 @@ psql -h risingwave-frontend -p 4567 -d dev -U root You can connect to RisingWave from Nodes such as EC2 in Kubernetes -**Steps:** - 1. In the `risingwave.yaml` file that you use to deploy the RisingWave instance, add a `frontendServiceType` parameter to the configuration of the RisingWave service, and set its value to `NodePort`. ```bash # ... @@ -548,8 +535,6 @@ psql -h ${RISINGWAVE_HOST} -p ${RISINGWAVE_PORT} -d dev -U root If you are using EKS, GCP, or other managed Kubernetes services provided by cloud vendors, you can expose the Service to the public network with a load balancer in the cloud. -**Steps:** - 1. In the `risingwave.yaml` file that you use to deploy the RisingWave instance, add a `frontendServiceType` parameter to the configuration of the RisingWave service, and set its value to `LoadBalancer`. ```bash # ... diff --git a/deploy/upgrade-risingwave-k8s.mdx b/deploy/upgrade-risingwave-k8s.mdx index 8b7a195e..5b3a3bc4 100644 --- a/deploy/upgrade-risingwave-k8s.mdx +++ b/deploy/upgrade-risingwave-k8s.mdx @@ -5,16 +5,14 @@ description: "This topic describes upgrade RisingWave in a K8s deployment with t --- -**CAUTION** + When upgrading RisingWave, it's important to be aware that there may be breaking changes. If you require technical support during the process of upgrading RisingWave in your production environments, please don't hesitate to reach out to us. - -**NOTE** - + Assuming that the Kubernetes namespace is `default`, if your RisingWave cluster is deployed in another namespace, please add the `-n ` argument to the `kubectl` and `helm` commands below. Remember to replace the `` with your own namespace. - + ## Upgrade RisingWave with Helm[](#upgrade-risingwave-with-helm "Direct link to Upgrade RisingWave with Helm") diff --git a/faq/risingwave-flink-comparison.mdx b/faq/risingwave-flink-comparison.mdx index e9ff72cd..4b207455 100644 --- a/faq/risingwave-flink-comparison.mdx +++ b/faq/risingwave-flink-comparison.mdx @@ -9,7 +9,7 @@ We periodically update this article to keep up with the rapidly evolving landsca ## Summary | Apache Flink | RisingWave | | -| -------------------------------- | --------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| :------------------------------- | :-------------------------------------------------------------------- | :------------------------------------------------------------------------- | | Version | 1.17 | Latest version | | License | Apache License 2.0 | Apache License 2.0 | | System category | Stream processing framework | Streaming database | diff --git a/get-started/quickstart.mdx b/get-started/quickstart.mdx index 93f95c10..f801e8cb 100644 --- a/get-started/quickstart.mdx +++ b/get-started/quickstart.mdx @@ -6,22 +6,16 @@ description: "This guide aims to provide a quick and easy way to get started wit ## Step 1: Start RisingWave - -**INFO** - The following options start RisingWave in the standalone mode. In this mode, data is stored in the file system and the metadata is stored in the embedded SQLite database. See [About RisingWave standalone mode](#about-risingwave-standalone-mode) for more details. For extensive testing or single-machine deployment, consider [starting RisingWave via Docker Compose](/docs/current/risingwave-docker-compose/). For production environments, consider [RisingWave Cloud](/docs/current/risingwave-cloud/), our fully managed service, or [deployment on Kubernetes using the Operator](/docs/current/risingwave-kubernetes/) or [Helm Chart](/docs/current/risingwave-k8s-helm/). - - - ### Script installation Open a terminal and run the following `curl` command. ```bash -curl https://risingwave.com/sh | sh +curl -L https://risingwave.com/sh | sh ``` To start a RisingWave instance, run the following command. diff --git a/get-started/rw-premium-edition-intro.mdx b/get-started/rw-premium-edition-intro.mdx index 53fc4901..3dd36074 100644 --- a/get-started/rw-premium-edition-intro.mdx +++ b/get-started/rw-premium-edition-intro.mdx @@ -38,7 +38,11 @@ The Premium Edition features are only available to users who have purchased a li ### Set license key -To set your license key: +There are two primary methods for setting the license key in your environment: + +#### Manual method + +To set your license key manually: 1. Before launching a new cluster: * Add `system.license_key` to your TOML configuration file, or @@ -48,6 +52,15 @@ To set your license key: ALTER SYSTEM SET license_key TO '...'; ``` +#### Automated method + +To set your license key automatically: + +1. Use the `--license-key-path` CLI option for the meta node to monitor and reload the license key from a specified file. This streamlines license key rotation in Cloud environments. +2. Alternatively, set the `RW_LICENSE_KEY_PATH` environment variable. + +The `--license-key-path` CLI option is only available for the meta node, as the license key is propagated to other nodes through system parameters. When the `--license-key-path` option is specified, any manual configuration of the license key through system parameters (`license_key`), the initial configuration (`system.license_key`), or the `RW_LICENSE_KEY` environment variable will be rejected. + ### Verify license key To check if your license key is valid, run: @@ -58,12 +71,15 @@ SELECT rw_test_paid_tier(); A result of `t` means the key is valid; an error message indicates an invalid key. + + + ## Support RisingWave Premium edition offers the premium support: -| Support feature | Standard | Premium | -| ------------------------ | ------------------ | ----------------- | +| **Support feature** | **Standard** | **Premium** | +| :--- | :--- | :--- | | Service hours | 12x5 | 24x7 | | Response time | Critical - 4 hours | Critical - 1 hour | | | High - 12 hours | High - 4 hours | diff --git a/images/Backpressure-panel copy.png b/images/Backpressure-panel copy.png deleted file mode 100644 index 363ab7b6..00000000 Binary files a/images/Backpressure-panel copy.png and /dev/null differ diff --git a/images/Streaming-performance.png b/images/actors-and-fragments.png similarity index 100% rename from images/Streaming-performance.png rename to images/actors-and-fragments.png diff --git a/images/add-a-database copy.png b/images/add-a-database copy.png deleted file mode 100644 index c11bf05b..00000000 Binary files a/images/add-a-database copy.png and /dev/null differ diff --git a/images/apply-to-database copy.png b/images/apply-to-database copy.png deleted file mode 100644 index 680cb0d2..00000000 Binary files a/images/apply-to-database copy.png and /dev/null differ diff --git a/images/auto-refresh copy.png b/images/auto-refresh copy.png deleted file mode 100644 index 69ac14da..00000000 Binary files a/images/auto-refresh copy.png and /dev/null differ diff --git a/images/beekeeper-connection copy.png b/images/beekeeper-connection copy.png deleted file mode 100644 index 1f79d395..00000000 Binary files a/images/beekeeper-connection copy.png and /dev/null differ diff --git a/images/binlog-format copy.png b/images/binlog-format copy.png deleted file mode 100644 index a2f1e4b1..00000000 Binary files a/images/binlog-format copy.png and /dev/null differ diff --git a/images/binlog-row copy.png b/images/binlog-row copy.png deleted file mode 100644 index 741b6096..00000000 Binary files a/images/binlog-row copy.png and /dev/null differ diff --git a/images/cloud-overview copy.png b/images/cloud-overview copy.png deleted file mode 100644 index 260efbdd..00000000 Binary files a/images/cloud-overview copy.png and /dev/null differ diff --git a/images/confluent-consumption copy.png b/images/confluent-consumption copy.png deleted file mode 100644 index 65c6c266..00000000 Binary files a/images/confluent-consumption copy.png and /dev/null differ diff --git a/images/connection-details copy.png b/images/connection-details copy.png deleted file mode 100644 index 05594e43..00000000 Binary files a/images/connection-details copy.png and /dev/null differ diff --git a/images/connectivity copy.png b/images/connectivity copy.png deleted file mode 100644 index 91bd7593..00000000 Binary files a/images/connectivity copy.png and /dev/null differ diff --git a/images/create-cluster copy.png b/images/create-cluster copy.png deleted file mode 100644 index e4dad6ff..00000000 Binary files a/images/create-cluster copy.png and /dev/null differ diff --git a/images/non-shared-source.png b/images/non-shared-source.png new file mode 100644 index 00000000..f86ee718 Binary files /dev/null and b/images/non-shared-source.png differ diff --git a/images/shared-source.png b/images/shared-source.png new file mode 100644 index 00000000..89674948 Binary files /dev/null and b/images/shared-source.png differ diff --git a/images/solace-pic1.png b/images/solace-pic1.png new file mode 100644 index 00000000..426f111a Binary files /dev/null and b/images/solace-pic1.png differ diff --git a/images/solace-pic2.png b/images/solace-pic2.png new file mode 100644 index 00000000..276a8369 Binary files /dev/null and b/images/solace-pic2.png differ diff --git a/images/table-with-connectors.png b/images/table-with-connectors.png new file mode 100644 index 00000000..731c97a0 Binary files /dev/null and b/images/table-with-connectors.png differ diff --git a/ingestion/format-and-encode-parameters.mdx b/ingestion/format-and-encode-parameters.mdx index 1eb89e1b..f87b46de 100644 --- a/ingestion/format-and-encode-parameters.mdx +++ b/ingestion/format-and-encode-parameters.mdx @@ -32,9 +32,8 @@ The `ENCODE` parameter represents the data encoding and includes the following o * `CSV`: Data serialized in CSV format in the message queue, compatible with `FORMAT PLAIN`. * `Bytes`: Data exists in the message queue in raw bytes format, compatible with `FORMAT PLAIN`. - -**NOTE** + +We support `FORMAT UPSERT ENCODE PROTOBUF` but DON'T RECOMMEND using it, because this may disrupt the order of upserts. For more details, see the [documentation of Confluent](https://docs.confluent.io/platform/7.6/control-center/topics/schema.html#c3-schemas-best-practices-key-value-pairs). -* We support `FORMAT UPSERT ENCODE PROTOBUF` but DON'T RECOMMEND using it, because this may disrupt the order of upserts. For more details, see the [documentation of Confluent](https://docs.confluent.io/platform/7.6/control-center/topics/schema.html#c3-schemas-best-practices-key-value-pairs). -* Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse. - +Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse. + diff --git a/ingestion/generate-test-data.mdx b/ingestion/generate-test-data.mdx index 510fe1de..5c8b70be 100644 --- a/ingestion/generate-test-data.mdx +++ b/ingestion/generate-test-data.mdx @@ -23,7 +23,7 @@ WITH ( The following table shows the data types that can be generated for each load generator type. | Generator \\ Data | Number | Timestamp | Timestamptz | Varchar | Struct | Array | -| ----------------- | ------ | --------- | ----------- | ------- | ------ | ----- | +| :---------------- | :----- | :-------- | :---------- | :------ | :----- | :---- | | **Sequence** | | | | | | | | **Random** | | | | | | | @@ -37,11 +37,11 @@ The sequence load generator can generate numbers, incremented by 1, from the sta Specify the following fields for every column. -| column\_parameter | Description | Value | Required? | -| ----------------- | ------------------------------------------------------ | ---------------------------------------------- | -------------------- | -| kind | Generator type | Set to sequence. | FalseDefault: random | -| start | Starting numberMust be smaller than the ending number. | Any number of the column data typeExample: 50 | FalseDefault: 0 | -| end | Ending numberMust be larger than the starting number. | Any number of the column data typeExample: 100 | FalseDefault: 32767 | +| column\_parameter | Description | Value | Required? | +| :---------------- | :---------------- | :-------------------- | :------------------- | +| kind | Generator type. | Set to `sequence`. | False. Default: `random` | +| start | Starting number must be smaller than the ending number. | Any number of the column data type. Example: `50` | False. Default: `0` | +| end | Ending number must be larger than the starting number. | Any number of the column data type. Example: `100` | False. Default: `32767` | @@ -49,12 +49,12 @@ The random number generator produces random numbers within a certain range. Specify the following fields for every column in the source you are creating. -| column\_parameter | Description | Value | Required? | -| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- | --------------------------------------------------------------------- | -| kind | Generator type | Set to random. | FalseDefault: random | -| min | The minimum number can be generated.Must be smaller than the maximum number. | Any number of the column data typeExample: 50 | FalseDefault: 0 | -| max | The maximum number can be generated.Must be larger than the minimum number. | Any number of the column data typeExample: 100 | FalseDefault: 32767 | -| seed | A seed number that initializes the random load generator. The sequence of the generated numbers is determined by the seed value. If given the same seed number, the generator will produce the same sequence of numbers. | A positive integerExample: 3 | FalseIf not specified, a fixed sequence of numbers will be generated. | +| column\_parameter | Description | Value | Required? | +| :---------------- | :---------------------- | :----------------- | :--------------------- | +| kind | Generator type. | Set to random. | False. Default: `random`| +| min | The minimum number can be generated. Must be smaller than the maximum number. | Any number of the column data type. Example: `50` | False. Default: `0` | +| max | The maximum number can be generated. Must be larger than the minimum number. | Any number of the column data type. Example: `100` | False. Default: `32767` | +| seed | A seed number that initializes the random load generator. The sequence of the generated numbers is determined by the seed value. If given the same seed number, the generator will produce the same sequence of numbers. | A positive integer. Example: `3` | False. If not specified, a fixed sequence of numbers will be generated. | @@ -65,13 +65,13 @@ The random timestamp and timestamptz generator produces random timestamps and ti Specify the following fields for every column in the source you are creating. -| column\_parameter | Description | Value | Required? | -| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------- | -| kind | Generator type | Set to random. | FalseDefault: random | -| max\_past | Specify the maximum deviation from the baseline timestamp or timestamptz to determine the earliest possible timestamp or timestamptz that can be generated. | An [interval](/docs/current/sql-data-types/)Example: 2h 37min | FalseDefault: 1 day | -| max\_past\_mode | Specify the baseline timestamp or timestamptz. The range for generated timestamps or timestamptzs is \[base time - max\_past , base time\] | absolute — The base time is set to the execution time of the generator. The base time is fixed for each generation.relative — The base time is the system time obtained each time a new record is generated. | FalseDefault: absolute | -| basetime | If set, the generator will ignore max\_past\_mode and use the specified time as the base time. | A [date and time string](https://docs.rs/chrono/latest/chrono/struct.DateTime.html#method.parse%5Ffrom%5Frfc3339)Example: 2023-04-01T16:39:57-08:00 | FalseDefault: generator execution time | -| seed | A seed number that initializes the random load generator. The sequence of the generated timestamps or timestamptzs is determined by the seed value. If given the same seed number, the generator will produce the same sequence of timestamps or timestamptzs. | A positive integerExample: 3 | FalseIf not specified, a fixed sequence of timestamps or timestamptzs will be generated (if the system time is constant). | +| column\_parameter | Description | Value | Required? | +| :---------------- | :--------------- | :------------- | :----------------- | +| kind | Generator type. | Set to `random`. | False. Default: `random` | +| max\_past | Specify the maximum deviation from the baseline timestamp or timestamptz to determine the earliest possible timestamp or timestamptz that can be generated. | An [interval](/docs/current/sql-data-types/). Example: `2h 37min` | False. Default: `1 day` | +| max\_past\_mode | Specify the baseline timestamp or timestamptz. The range for generated timestamps or timestamptzs is \[base time - `max_past`, base time\] | `absolute` — The base time is set to the execution time of the generator. The base time is fixed for each generation. `relative` — The base time is the system time obtained each time a new record is generated. | False. Default: `absolute` | +| basetime | If set, the generator will ignore max\_past\_mode and use the specified time as the base time. | A [date and time string](https://docs.rs/chrono/latest/chrono/struct.DateTime.html#method.parse%5Ffrom%5Frfc3339). Example: `2023-04-01T16:39:57-08:00` | False. Default: generator execution time | +| seed | A seed number that initializes the random load generator. The sequence of the generated timestamps or timestamptzs is determined by the seed value. If given the same seed number, the generator will produce the same sequence of timestamps or timestamptzs. | A positive integer. Example: `3` | False. If not specified, a fixed sequence of timestamps or timestamptzs will be generated (if the system time is constant). | @@ -81,11 +81,11 @@ The random varchar generator produces random combination of uppercase and lowerc Specify the following fields for every column in the source you are creating. -| column\_parameter | Description | Value | Required? | -| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------- | ------------------------------------------------------------------------ | -| kind | Generator type | Set to random. | FalseDefault: random | -| length | The length of the varchar to be generated. | A positive integerExample: 16 | FalseDefault: 10 | -| seed | A seed number that initializes the random load generator. The sequence of the generated characters is determined by the seed value. If given the same seed number, the generator will produce the same sequence of characters. | A positive integerExample: 3 | FalseIf not specified, a fixed sequence of characters will be generated. | +| column\_parameter | Description | Value | Required? | +| :---------------- | :---------------- | :---------------- | :---------------- | +| kind | Generator type. | Set to `random`. | False. Default: `random` | +| length | The length of the varchar to be generated. | A positive integer. Example: `16` | False. Default: 10 | +| seed | A seed number that initializes the random load generator. The sequence of the generated characters is determined by the seed value. If given the same seed number, the generator will produce the same sequence of characters. | A positive integer. Example: `3` | False. If not specified, a fixed sequence of characters will be generated. | @@ -107,10 +107,9 @@ WITH ( ``` -**INFO** +You need to configure each nested column in the struct. Select other tabs according to the data type of the nested columns for information on column parameters. -* You need to configure each nested column in the struct. Select other tabs according to the data type of the nested columns for information on column parameters. -* When you configure a nested column, use `column.nested_column` to specify it. For example, `v1.v2` and `v1.v3` in the `WITH` clause above. +When you configure a nested column, use `column.nested_column` to specify it. For example, `v1.v2` and `v1.v3` in the `WITH` clause above. @@ -132,10 +131,9 @@ WITH ( ``` -**INFO** +You need to specify the number of elements in the array in the `WITH` clause. `fields.c1.length = '3'` in the example above means that `c1` is an array of three elements. -* You need to specify the number of elements in the array in the `WITH` clause. `fields.c1.length = '3'` in the example above means that `c1` is an array of three elements. -* When you configure the elements in an array, use `column._` to specify them. For example, `c1._` in the `WITH` clause above. +When you configure the elements in an array, use `column._` to specify them. For example, `c1._` in the `WITH` clause above. Select other tabs according to the data type of the array for information on column parameters. diff --git a/ingestion/ingest-additional-fields-with-include-clause.mdx b/ingestion/ingest-additional-fields-with-include-clause.mdx index 099e74e1..a7891ac2 100644 --- a/ingestion/ingest-additional-fields-with-include-clause.mdx +++ b/ingestion/ingest-additional-fields-with-include-clause.mdx @@ -36,13 +36,13 @@ The `INCLUDE` clause can be used with the following source connectors. When ingesting data from Kafka, the following additional fields can be included. -| Allowed Components | Default Type | Note | -| ------------------ | ---------------------------------------- | ---------------------------------------------- | -| key | bytea | Can be overwritten by ENCODE and KEY ENCODE. | -| timestamp | timestamp with time zone (i64 in millis) | Refer to CreateTime rather than LogAppendTime. | -| partition | varchar | The partition the message is from. | -| offset | varchar | The offset in the partition. | -| headers | struct\\[\] | Key-value pairs along with the message. | +| Allowed components | Default type | Note | +| :----------------- | :--------------------------------------- | :--------------------------------------------- | +| key | `bytea` | Can be overwritten by `ENCODE` and `KEY ENCODE`. | +| timestamp | `timestamp with time zone` (i64 in millis) | Refer to `CreateTime` rather than `LogAppendTime`. | +| partition | `varchar` | The partition the message is from. | +| offset | `varchar` | The offset in the partition. | +| headers | `struct[]` | Key-value pairs along with the message. | In the case of headers, there are two ways to define it. @@ -62,9 +62,9 @@ INCLUDE header 'header_col' [AS kafka_header] When ingesting data from Kinesis, here are some things to note when including the following fields. -| Allowed Components | Default Type | Note | -| ------------------ | ------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| key | bytea | Can be overwritten by encode and key encode. | +| Allowed components | Default type | Note | +| :----------------- | :---------------- | :-------------- | +| key | bytea | Can be overwritten by encode and key encode. | | timestamp | timestamp with time zone | See the approximate\_arrival\_timestamp field at [Struct aws\_sdk\_kinesis::types::Record](https://docs.rs/aws-sdk-kinesis/latest/aws%5Fsdk%5Fkinesis/types/struct.Record.html). | | partition | varchar | The partition the message is from. | | offset | varchar | The offset in the partition, which corresponds to Kinesis sequence numbers. | @@ -75,8 +75,8 @@ For more components, see [Struct aws\_sdk\_kinesis::types::Record](https://docs. When ingesting data from Pulsar, here are some things to note when including the following fields. -| Allowed Components | Default Type | Note | -| ------------------ | ------------ | -------------------------------------------- | +| Allowed components | Default type | Note | +| :----------------- | :----------- | :------------------------------------------- | | key | bytea | Can be overwritten by ENCODE and KEY ENCODE. | | partition | varchar | The partition the message is from. | | offset | varchar | The offset in the partition. | @@ -87,11 +87,19 @@ For more components, see [Struct pulsar::message::proto::MessageMetadata](https: When ingesting data from AWS S3 or GCS, the following additional fields can be included. -| Allowed Components | Default Type | Note | -| ------------------ | ------------ | ---------------------------- | +| Allowed components | Default type | Note | +| :----------------- | :----------- | :--------------------------- | | file | varchar | The file the record is from. | | offset | varchar | The offset in the file. | +### MQTT + +When ingesting data from MQTT, the following additional fields can be included. + +| Allowed components | Default type | Note | +| :------------------| :------------| :---------------------------------| +| partition | varchar | The topic the record is from. | + ## Examples Here we create a table, `additional_columns`, that ingests data from a Kafka broker. Aside from the `a` column, which is part of the message payload, the additional fields `key`, `partition`, `offset`, `timestamp`, and `header`, are also added to the table. diff --git a/ingestion/modify-source-or-table-schemas.mdx b/ingestion/modify-source-or-table-schemas.mdx index fb38be7e..ffcb5831 100644 --- a/ingestion/modify-source-or-table-schemas.mdx +++ b/ingestion/modify-source-or-table-schemas.mdx @@ -23,11 +23,9 @@ ALTER TABLE ADD COLUMN ; For details about these two commands, see [ALTER SOURCE](/docs/current/sql-alter-source/) and [ALTER TABLE](/docs/current/sql-alter-table/). - -**NOTE** - + Note that you cannot add a primary key column to a source or table in RisingWave. To modify the primary key of a source or table, you need to recreate the table. - + When you add a column to a source or table, the new column is not automatically picked up in a downstream materialized view. @@ -93,11 +91,9 @@ ALTER SOURCE src_user FORMAT PLAIN ENCODE PROTOBUF( ); ``` - -**NOTE** - + Currently, it is not supported to modify the `data_format` and `data_encode`. Furthermore, when refreshing the schema registry of a source, it is not allowed to drop columns or change types. - + In addition, when the [FORMAT and ENCODE options](/docs/current/formats-and-encode-parameters/) are not changed, the `REFRESH SCHEMA` clause of `ALTER SOURCE` can also be used to refresh the schema of a source. @@ -142,11 +138,9 @@ Refresh schema of table ALTER TABLE src_user REFRESH SCHEMA; ``` - -**NOTE** - + If a downstream fragment references a column that is either missing or has undergone a type change in the updated schema, the command will be declined. - + ## See also diff --git a/ingestion/supported-sources-and-formats.mdx b/ingestion/supported-sources-and-formats.mdx index 2a42ae9e..f51e89ea 100644 --- a/ingestion/supported-sources-and-formats.mdx +++ b/ingestion/supported-sources-and-formats.mdx @@ -6,14 +6,12 @@ title: "Supported sources and formats" Below is the complete list of connectors supported by RisingWave. Click a connector name to see the SQL syntax, options, and sample statement of connecting RisingWave to the connector. - -**NOTE** - + To ingest data in formats marked with "T", you need to create tables (with connector settings). Otherwise, you can create either sources or tables (with connector settings). - + -| Connector | Version | Format | -| ----------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Connector | Version | Format | +| :------------ | :------------ | :------------------- | | [Kafka](/docs/current/ingest-from-kafka/) | 3.1.0 or later versions | [Avro](#avro), [JSON](#json), [protobuf](#protobuf), [Debezium JSON](#debezium-json) (T), [Debezium AVRO](#debezium-avro) (T), [DEBEZIUM\_MONGO\_JSON](#debezium-mongo-json) (T), [Maxwell JSON](#maxwell-json) (T), [Canal JSON](#canal-json) (T), [Upsert JSON](#upsert-json) (T), [Upsert AVRO](#upsert-avro) (T), [Bytes](#bytes) | | [Redpanda](/docs/current/ingest-from-redpanda/) | Latest | [Avro](#avro), [JSON](#json), [protobuf](#protobuf) | | [Pulsar](/docs/current/ingest-from-pulsar/) | 2.8.0 or later versions | [Avro](#avro), [JSON](#json), [protobuf](#protobuf), [Debezium JSON](#debezium-json) (T), [Maxwell JSON](#maxwell-json) (T), [Canal JSON](#canal-json) (T) | @@ -26,51 +24,40 @@ To ingest data in formats marked with "T", you need to create tables (with conne | [Google Pub/Sub](/docs/current/ingest-from-google-pubsub/) | [Avro](#avro), [JSON](#json), [protobuf](#protobuf), [Debezium JSON](#debezium-json) (T), [Maxwell JSON](#maxwell-json) (T), [Canal JSON](#canal-json) (T) | | | [Google Cloud Storage](/docs/current/ingest-from-gcs/) | [JSON](#json) | | - -**NOTE** - + When a source is created, RisingWave does not ingest data immediately. RisingWave starts to process data when a materialized view is created based on the source. - + ## Supported formats When creating a source, you need to specify the data and encoding formats in the `FORMAT` and `ENCODE` section of the `CREATE SOURCE` or `CREATE TABLE` statement. Below is the complete list of the supported formats in RisingWave. - -**PUBLIC PREVIEW** - -`schema.registry.name.strategy` is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). - - ### Avro For data in Avro format, you must specify a message and a schema registry. For Kafka data in Avro, you need to provide a Confluent Schema Registry that RisingWave can get the schema from. For more details about using Schema Registry for Kafka data, see [Read schema from Schema Registry](/docs/current/ingest-from-kafka/#read-schemas-from-schema-registry). `schema.registry` can accept multiple addresses. RisingWave will send requests to all URLs and return the first successful result. -Optionally, you can define a `schema.registry.name.strategy` if `schema.registry` is set. Accepted options include `topic_name_strategy`, `record_name_strategy`, and `topic_record_name_strategy`. If either `record_name_strategy` or `topic_record_name_strategy` is used, the `message` field must also be defined. For additional details on name strategy, see [Subject name strategy](https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#subject-name-strategy). - Please be aware that: * For Avro data, you cannot specify the schema in the `schema_definition` section of a `CREATE SOURCE` or `CREATE TABLE` statement. * The timestamp displayed in RisingWave may be different from the upstream system as timezone information is lost in Avro serialization. +* RisingWave takes [`TopicNameStrategy`](https://developer.confluent.io/courses/schema-registry/schema-subjects/#topicnamestrategy) as the default subject name strategy for the schema registry and looks for the schema with the subject name `{ topic name }-value`. Syntax: -```js +```sql FORMAT PLAIN ENCODE AVRO ( - schema.registry = 'schema_registry_url [, ...]', - [schema.registry.name.strategy = 'topic_name_strategy'], - [message = 'main_message'], + schema.registry = 'schema_registry_url [, ...]', ) ``` You can ingest Avro map type into RisingWave [map type](/docs/current/data-type-map/) or jsonb: -```js +```sql FORMAT [ DEBEZIUM | UPSERT | PLAIN ] ENCODE AVRO ( - map.handling.mode = 'map' | 'jsonb' + map.handling.mode = 'map' | 'jsonb' ) ``` @@ -82,20 +69,16 @@ When creating a source from streams in with Debezium AVRO, the schema of the sou `schema.registry` can accept multiple addresses. RisingWave will send requests to all URLs and return the first successful result. -Optionally, you can define a `schema.registry.name.strategy` if `schema.registry` is set. Accepted options include `topic_name_strategy`, `record_name_strategy`, and `topic_record_name_strategy`. If either `record_name_strategy` or `topic_record_name_strategy` is used, the `key.message` field must also be defined. For additional details on name strategy, see the [Subject name strategy](https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#subject-name-strategy) in the Confluent documentation. - `ignore_key` can be used to ignore the key part of given messages. By default, it is `false`. If set to `true`, only the payload part of the message will be consumed. In this case, the payload must not be empty and tombstone messages cannot be handled. Syntax: -```js +```sql FORMAT DEBEZIUM ENCODE AVRO ( - message = 'main_message', - schema.location = 'location' | schema.registry = 'schema_registry_url [, ...]', - [schema.registry.name.strategy = 'topic_name_strategy'], - [key.message = 'test_key'], - [ignore_key = 'true | false'] + message = 'main_message', + schema.registry = 'schema_registry_url [, ...]', + [ignore_key = 'true | false'] ) ``` @@ -105,16 +88,12 @@ When consuming data in AVRO from Kafka topics, the `FORMAT` and `ENCODE` section `schema.registry` can accept multiple addresses. RisingWave will send requests to all URLs and return the first successful result. -Optionally, you can define a `schema.registry.name.strategy` if `schema.registry` is set. Accepted options include `topic_name_strategy`, `record_name_strategy`, and `topic_record_name_strategy`. If either `record_name_strategy` or `topic_record_name_strategy` is used, the `message` field must also be defined. For additional details on name strategy, see [Subject name strategy](https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#subject-name-strategy). - Syntax: -```js +```sql FORMAT UPSERT ENCODE AVRO ( schema.location = 'location' | schema.registry = 'schema_registry_url [, ...]', - [schema.registry.name.strategy = 'topic_name_strategy'], - [message = 'main_message'], ) ``` @@ -126,7 +105,7 @@ RisingWave decodes JSON directly from external sources. When creating a source f Syntax: -```js +```sql FORMAT PLAIN ENCODE JSON [ ( schema.registry = 'schema_registry_url [, ...]', @@ -208,11 +187,7 @@ For data in protobuf format, you must specify a message (fully qualified by pack `schema.registry` can accept multiple addresses. RisingWave will send requests to all URLs and return the first successful result. -Optionally, you can define a `schema.registry.name.strategy` if `schema.registry` is set. Accepted options include `topic_name_strategy`, `record_name_strategy`, and `topic_record_name_strategy`. For additional details on name strategy, see [Subject name strategy](https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#subject-name-strategy). - -**INFO** - For protobuf data, you cannot specify the schema in the `schema_definition` section of a `CREATE SOURCE` or `CREATE TABLE` statement. @@ -229,7 +204,6 @@ FORMAT PLAIN ENCODE PROTOBUF ( message = 'com.example.MyMessage', schema.location = 'location' | schema.registry = 'schema_registry_url [, ...]', - [schema.registry.name.strategy = 'topic_name_strategy'], ) ``` diff --git a/integrations/destinations/amazon-dynamodb.mdx b/integrations/destinations/amazon-dynamodb.mdx index a71ad036..811f9f93 100644 --- a/integrations/destinations/amazon-dynamodb.mdx +++ b/integrations/destinations/amazon-dynamodb.mdx @@ -36,7 +36,7 @@ FORMAT data_format ENCODE data_encode [ ( ## Parameters | Field | Note | -| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :---------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | | table | Required. Name of the DynamoDB table where you want to write the data. | | primary\_key | Required. A pair of columns representing the partition key and sort key of DynamoDB, e.g., key1,key2, separated by comma. | | aws.region | Required. AWS region where your DynamoDB table is hosted. | @@ -46,7 +46,8 @@ FORMAT data_format ENCODE data_encode [ ( | aws.credentials.role.arn | Conditional. ARN of the IAM role to assume for accessing DynamoDB. Must be specified when using AssumeRole. | | aws.credentials.role.external\_id | Conditional. External ID for assuming the IAM role specified in aws.credentials.role.arn. | | aws.profile | Optional. The name of the AWS CLI profile to use for accessing DynamoDB. If specified, it overrides the default profile. | -| dynamodb.max\_batch\_rows | Optional. Maximum number of rows to write in a single batch operation to DynamoDB. This helps optimize throughput and manage rate limits. Default value is 1024. | +| dynamodb.max\_batch\_item\_nums | Optional. The maximum number of items in the [`BatchWriteItem`](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html) operation. It must be larger than 1, and less than or equal to 25. The default is 25. | +| dynamodb.max\_future\_send\_nums | Optional. The maximum number of concurrent write futures in DynamoDB. It must be less than 360, and the default is 256. This is derived from user-defined `max_parallelism_units` (40000 by default). If the write throughput of RisingWave exceeds the `max_parallelism_units` set in DynamoDB, an error would be reported. | ## Partition key and sort key mapping @@ -68,7 +69,7 @@ This makes sure that the data structure in RisingWave aligns with the key defini ## Data type mapping | RisingWave Data Type | DynamoDB Data Type | -| --------------------------- | ------------------ | +| :-------------------------- | :----------------- | | boolean | Bool | | smallint | number (N) | | integer | number (N) | @@ -88,8 +89,6 @@ This makes sure that the data structure in RisingWave aligns with the key defini | array | list (L) | | JSONB | string (S) | - -**NOTE** - + The `struct` datatype in RisingWave will map to `map (M)` in DynamoDB in a recursive way. Refer to [source code](https://github.com/risingwavelabs/risingwave/blob/88bb14aa6eb481f1dc0e92ee190bafad089d2afd/src/connector/src/sink/dynamodb.rs#L386) for details. - + diff --git a/integrations/destinations/apache-doris.mdx b/integrations/destinations/apache-doris.mdx index fe001bf5..92d2116f 100644 --- a/integrations/destinations/apache-doris.mdx +++ b/integrations/destinations/apache-doris.mdx @@ -24,7 +24,7 @@ WITH ( ## Parameters | Parameter Names | Description | -| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, the table you are sinking to needs to have a UNIQUE KEY. | | doris.url | Required. The connection port for the frontend of Doris. This is not the MySQL connection port. | | doris.username | Required. The user name of the Doris user. | @@ -33,7 +33,7 @@ WITH ( | doris.table | Required. The Doris table you want to sink data to. | | doris.partial\_column | Optional. Defaults to false. If true, you can perform partial updates on the columns of a table, see the [Partial update](https://doris.apache.org/docs/2.0/data-operate/update/update-of-unique-model/#partial-update) in the Doris documentation for more details. | | force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. | -| primary\_key | Optional. The primary keys of the sink. Use ',' to delimit the primary key columns. | +| primary\_key | Optional. The primary keys of the sink. Use `,` to delimit the primary key columns. | ## Examples @@ -80,7 +80,7 @@ The following table shows the corresponding data types between RisingWave and Do In regards to `decimal` types, RisingWave will round to the nearest decimal place to ensure that its precision matches that of Doris. Ensure that the length of decimal types being imported into Doris does not exceed Doris's decimal length. Otherwise, it will fail to import. | Doris type | RisingWave type | -| --------------- | --------------------------- | +| :-------------- | :-------------------------- | | BOOLEAN | BOOLEAN | | SMALLINT | SMALLINT | | INT | INTEGER | @@ -100,9 +100,4 @@ In regards to `decimal` types, RisingWave will round to the nearest decimal plac | JSONB | JSONB | | BIGINT | SERIAL | - -**NOTE** - -Before v1.9, when inserting data into an Apache Doris sink, an error would be reported if the values were "nan (not a number)", "inf (infinity)", or "-inf (-infinity)". Since v1.9, we have made a change to the behavior. If a decimal value is out of bounds or represents "inf", "-inf", or "nan", we will insert null values. - - +If a decimal value is out of bounds or represents `inf`, `-inf`, or `nan`, RisingWave will insert null values. \ No newline at end of file diff --git a/integrations/destinations/apache-iceberg.mdx b/integrations/destinations/apache-iceberg.mdx index 6190bce0..81839525 100644 --- a/integrations/destinations/apache-iceberg.mdx +++ b/integrations/destinations/apache-iceberg.mdx @@ -28,10 +28,10 @@ WITH ( ## Parameters | Parameter Names | Description | -| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | type | Required. Allowed values: appendonly and upsert. | | force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. | -| s3.endpoint | Optional. Endpoint of the S3\. For MinIO object store backend, it should be http://${MINIO_HOST}:${MINIO_PORT}. For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html) | +| s3.endpoint | Optional. Endpoint of the S3.
  • For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
  • For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
| | s3.region | Optional. The region where the S3 bucket is hosted. Either s3.endpoint or s3.region must be specified. | | s3.access.key | Required. Access key of the S3 compatible object store. | | s3.secret.key | Required. Secret key of the S3 compatible object store. | @@ -42,14 +42,14 @@ WITH ( | warehouse.path | Conditional. The path of the Iceberg warehouse. Currently, only S3-compatible object storage systems, such as AWS S3 and MinIO, are supported. It's required if the catalog.type is not rest. | | catalog.url | Conditional. The URL of the catalog. It is required when catalog.type is not storage. | | primary\_key | The primary key for an upsert sink. It is only applicable to the upsert mode. | -| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. | +| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10.
The behavior of this field also depends on the `sink_decouple` setting:
  • If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
  • If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
  • If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
| ## Data type mapping RisingWave converts risingwave data types from/to Iceberg according to the following data type mapping table: | RisingWave Type | Iceberg Type | -| --------------- | ------------ | +| :-------------- | :----------- | | boolean | boolean | | int | integer | | bigint | long | @@ -167,9 +167,11 @@ with ( ### Glue catalog -Premium Edition Feature + +**PREMIUM EDITION FEATURE** This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com). + RisingWave supports the Glue catalog. You should use AWS S3 if you use the Glue catalog. Below are example codes for using this catalog: diff --git a/integrations/destinations/apache-kafka.mdx b/integrations/destinations/apache-kafka.mdx index 5d264690..6057407c 100644 --- a/integrations/destinations/apache-kafka.mdx +++ b/integrations/destinations/apache-kafka.mdx @@ -22,32 +22,30 @@ FORMAT data_format ENCODE data_encode [ ( ; ``` - -**NOTE** - + Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). - + ## Basic parameters All `WITH` options are required unless explicitly mentioned as optional. -| Parameter or clause | Description | -| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| sink\_name | Name of the sink to be created. | -| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. | +| Parameter or clause | Description | +| :-------------------------- | :------------- | +| sink\_name | Name of the sink to be created. | +| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. | | AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | -| connector | Sink connector type must be 'kafka' for Kafka sink. | -| properties.bootstrap.server | Address of the Kafka broker. Format: ‘ip:port’. If there are multiple brokers, separate them with commas. | -| topic | Address of the Kafka topic. One sink can only correspond to one topic. | -| primary\_key | Conditional. The primary keys of the sink. Use ',' to delimit the primary key columns. This field is optional if creating a PLAIN sink, but required if creating a DEBEZIUM or UPSERT sink. | +| connector | Sink connector type must be `kafka` for Kafka sink. | +| properties.bootstrap.server | Address of the Kafka broker. Format: `ip:port`. If there are multiple brokers, separate them with commas. | +| topic | Address of the Kafka topic. One sink can only correspond to one topic. | +| primary\_key | Conditional. The primary keys of the sink. Use `,` to delimit the primary key columns. This field is optional if creating a `PLAIN` sink, but required if creating a `DEBEZIUM` or `UPSERT` sink. | ## Additional Kafka parameters When creating a Kafka sink in RisingWave, you can specify the following Kafka-specific parameters. To set the parameter, add the RisingWave equivalent of the Kafka parameter as a `WITH` option. For additional details on these parameters, see the [Configuration properties](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md). -| Kafka parameter name | RisingWave parameter name | Type | -| ------------------------------------- | ------------------------------------------------ | ------ | +| Kafka parameter name | RisingWave parameter name | Type | +| :----------------------- | :------------------------------ | :----- | | allow.auto.create.topics | properties.allow.auto.create.topics | bool | | batch.num.messages | properties.batch.num.messages | int | | batch.size | properties.batch.size | int | @@ -66,36 +64,33 @@ When creating a Kafka sink in RisingWave, you can specify the following Kafka-sp | receive.message.max.bytes | properties.receive.message.max.bytes | int | | ssl.endpoint.identification.algorithm | properties.ssl.endpoint.identification.algorithm | str | - -**NOTE** -* Set `properties.ssl.endpoint.identification.algorithm` to `none` to bypass the verification of CA certificates and resolve SSL handshake failure. This parameter can be set to either `https` or `none`. By default, it is `https`. -* Starting with version 2.0, the default value for `properties.message.timeout.ms` has changed from 5 seconds to **5 minutes**, aligning with the default setting in the [official Kafka library](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md). - +Set `properties.ssl.endpoint.identification.algorithm` to `none` to bypass the verification of CA certificates and resolve SSL handshake failure. This parameter can be set to either `https` or `none`. By default, it is `https`. + +Starting with version 2.0, the default value for `properties.message.timeout.ms` has changed from 5 seconds to **5 minutes**, aligning with the default setting in the [official Kafka library](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md). -## FORMAT and ENCODE options - -**NOTE** +## FORMAT and ENCODE options -These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause + +These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause. + - -| Field | Notes | -| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. DEBEZIUM: Output change data capture (CDC) log in Debezium format. UPSERT: Output data as a changelog stream. primary\_key must be specified in this case. To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). | -| data\_encode | Data encode. Allowed encodes: JSON: Supports PLAIN JSON, UPSERT JSON and DEBEZIUM JSON sinks. AVRO: Supports UPSERT AVRO and PLAIN AVRO sinks. PROTOBUF: Supports PLAIN PROTOBUF and UPSERT PROTOBUF sinks. For UPSERT PROTOBUF sinks, you must specify key encode text, while it remains optional for other format/encode combinations. | -| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. | -| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding. \- If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z. \- When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000. | -| schemas.enable | Only configurable for upsert JSON sinks. By default, this value is false for upsert JSON sinks and true for debezium JSON sinks. If true, RisingWave will sink the data with the schema to the Kafka sink. Note that this is not referring to a schema registry containing a JSON schema, but rather schema formats defined using [Kafka Connect](https://www.confluent.io/blog/kafka-connect-deep-dive-converters-serialization-explained/#json-schemas). | -| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). | +| Field | Notes | +| :------------------------ | :-------------------------- | +| data\_format | Data format. Allowed formats:
  • `PLAIN`: Output data with insert operations.
  • `DEBEZIUM`: Output change data capture (CDC) log in Debezium format.
  • `UPSERT`: Output data as a changelog stream. `primary_key` must be specified in this case.
To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). | +| data\_encode | Data encode. Allowed encodes:
  • `JSON`: Supports `PLAIN JSON`, `UPSERT JSON` and `DEBEZIUM JSON` sinks.
  • `AVRO`: Supports `UPSERT AVRO` and `PLAIN AVRO` sinks.
  • `PROTOBUF`: Supports `PLAIN PROTOBUF` and `UPSERT PROTOBUF` sinks.
For `UPSERT PROTOBUF` sinks, you must specify `key encode text`, while it remains optional for other format/encode combinations. | +| force\_append\_only | If true, forces the sink to be `PLAIN` (also known as append-only), even if it cannot be. | +| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
  • If omitted, the output format of timestamptz is `2023-11-11T18:30:09.453000Z` which includes the UTC suffix `Z`.
  • When `utc_without_suffix` is specified, the format is changed to `2023-11-11 18:30:09.453000`.
| +| schemas.enable | Only configurable for upsert JSON sinks. By default, this value is false for upsert JSON sinks and true for debezium JSON sinks. If true, RisingWave will sink the data with the schema to the Kafka sink. This is not referring to a schema registry containing a JSON schema, but rather schema formats defined using [Kafka Connect](https://www.confluent.io/blog/kafka-connect-deep-dive-converters-serialization-explained/#json-schemas). | +| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. | ### Avro specific parameters When creating an Avro sink, the following options can be used following `FORMAT UPSERT ENCODE AVRO` or `FORMAT PLAIN ENCODE AVRO`. | Field | Notes | -| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------ | +| :---------------------------- | :----------------------------------------------------------------------------------------------------------------------------- | | schema.registry | Required. The address of the schema registry. | | schema.registry.username | Optional. The user name used to access the schema registry. | | schema.registry.password | Optional. The password associated with the user name. | @@ -105,7 +100,7 @@ When creating an Avro sink, the following options can be used following `FORMAT Syntax: -```js +```sql FORMAT [ UPSERT | PLAIN ] ENCODE AVRO ( schema.registry = 'schema_registry_url', @@ -123,20 +118,18 @@ For data type mapping, the serial type is supported. We map the serial type to t When creating an append-only Protobuf sink, the following options can be used following `FORMAT PLAIN ENCODE PROTOBUF` or `FORMAT UPSERT ENCODE PROTOBUF`. -| Field | Notes | -| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Field | Notes | +| :---------------------------- | :----------------------- | | message | Required. Package qualified message name of the main Message in the schema definition. | -| schema.location | Required if schema.registry is not specified. Only one of schema.location or schema.registry can be defined. The schema location. This can be in either file://, http://, https:// format. | +| schema.location | Required if schema.registry is not specified. Only one of schema.location or schema.registry can be defined. The schema location. This can be in either `file://`, `http://`, `https://` format. | | schema.registry | Required if schema.location is not specified. Only one of schema.location or schema.registry can be defined. The address of the schema registry. | | schema.registry.username | Optional. The user name used to access the schema registry. | | schema.registry.password | Optional. The password associated with the user name. | | schema.registry.name.strategy | Optional. Accepted options include topic\_name\_strategy (default), record\_name\_strategy, and topic\_record\_name\_strategy. | - -**NOTE** - + The `file://` format is not recommended for production use. If it is used, it needs to be available for both meta and compute nodes. - + Syntax: @@ -245,7 +238,7 @@ If your Kafka sink service is located in a different VPC from RisingWave, use AW To create a Kafka sink with a PrivateLink connection, in the WITH section of your `CREATE SINK` statement, specify the following parameters. | Parameter | Notes | -| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | privatelink.targets | The PrivateLink targets that correspond to the Kafka brokers. The targets should be in JSON format. Note that each target listed corresponds to each broker specified in the properties.bootstrap.server field. If the order is incorrect, there will be connectivity issues. | | privatelink.endpoint | The DNS name of the VPC endpoint. If you're using RisingWave Cloud, you can find the auto-generated endpoint after you created a connection. See details in [Create a VPC connection](/cloud/create-a-connection/#whats-next). | | connection.name | The name of the connection, which comes from the connection created using the [CREATE CONNECTION](/docs/current/sql-create-connection/) statement. Omit this parameter if you have provisioned a VPC endpoint using privatelink.endpoint (recommended). | @@ -291,18 +284,16 @@ You need to specify encryption and authentication parameters in the WITH section To sink data encrypted with SSL without SASL authentication, specify these parameters in the WITH section of your `CREATE SINK` statement. | Parameter | Notes | -| ----------------------------------- | ----------- | +| :---------------------------------- | :---------- | | properties.security.protocol | Set to SSL. | | properties.ssl.ca.location | | | properties.ssl.certificate.location | | | properties.ssl.key.location | | | properties.ssl.key.password | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. - + Here is an example of creating a sink encrypted with SSL without using SASL authentication. @@ -323,18 +314,16 @@ FORMAT PLAIN ENCODE JSON; | Parameter | Notes | -| ---------------------------- | ---------------------------------------------------------------------------------------------- | +| :--------------------------- | :--------------------------------------------------------------------------------------------- | | properties.security.protocol | For SASL/PLAIN without SSL, set to SASL\_PLAINTEXT. For SASL/PLAIN with SSL, set to SASL\_SSL. | | properties.sasl.mechanism | Set to PLAIN. | | properties.sasl.username | | | properties.sasl.password | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. + - For SASL/PLAIN with SSL, you need to include these SSL parameters: * `properties.ssl.ca.location` @@ -380,17 +369,15 @@ FORMAT PLAIN ENCODE JSON; | Parameter | Notes | -| ---------------------------- | ---------------------------------------------------------------------------------------------- | +| :--------------------------- | :--------------------------------------------------------------------------------------------- | | properties.security.protocol | For SASL/SCRAM without SSL, set to SASL\_PLAINTEXT. For SASL/SCRAM with SSL, set to SASL\_SSL. | | properties.sasl.mechanism | Set to SCRAM-SHA-256 or SCRAM-SHA-512 depending on the encryption method used. | | properties.sasl.username | | | properties.sasl.password | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. - + For SASL/SCRAM with SSL, you also need to include these SSL parameters: @@ -418,7 +405,7 @@ FORMAT PLAIN ENCODE JSON; | Parameter | Notes | -| ------------------------------------------------ | ---------------------------------------------------------------------------------- | +| :----------------------------------------------- | :--------------------------------------------------------------------------------- | | properties.security.protocol | Set to SASL\_PLAINTEXT, as RisingWave does not support using SASL/GSSAPI with SSL. | | properties.sasl.mechanism | Set to GSSAPI. | | properties.sasl.kerberos.service.name | | @@ -427,11 +414,9 @@ FORMAT PLAIN ENCODE JSON; | properties.sasl.kerberos.kinit.cmd | | | properties.sasl.kerberos.min.time.before.relogin | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. - + Here is an example of creating a sink authenticated with SASL/GSSAPI without SSL encryption. @@ -454,22 +439,19 @@ WITH ( -**CAUTION** The implementation of SASL/OAUTHBEARER in RisingWave validates only [unsecured client side tokens](https://docs.confluent.io/platform/current/kafka/authentication%5Fsasl/authentication%5Fsasl%5Foauth.html#unsecured-client-side-token-creation-options-for-sasl-oauthbearer), and does not support OpenID Connect (OIDC) authentication. Therefore, it should not be used in production environments. | Parameter | Notes | -| ---------------------------------- | ---------------------------------------------------------------------------------------------------------- | +| :--------------------------------- | :--------------------------------------------------------------------------------------------------------- | | properties.security.protocol | For SASL/OAUTHBEARER without SSL, set to SASL\_PLAINTEXT. For SASL/OAUTHBEARER with SSL, set to SASL\_SSL. | | properties.sasl.mechanism | Set to OAUTHBEARER. | | properties.sasl.oauthbearer.config | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. Also, due to the limitation of the SASL/OAUTHBEARER implementation, you only need to specify one OAUTHBEARER parameter: `properties.sasl.oauthbearer.config`. Other OAUTHBEARER parameters are not applicable. - + For SASL/OAUTHBEARER with SSL, you also need to include these SSL parameters: @@ -499,7 +481,7 @@ WITH ( ## Data type mapping - RisingWave and Debezium JSON | RisingWave Data Type | Schema Type in JSON | Schema Name in JSON | -| ---------------------- | ------------------- | --------------------------------------- | +| :--------------------- | :------------------ | :-------------------------------------- | | boolean | boolean | n/a | | smallint | int16 | n/a | | integer | int32 | n/a | diff --git a/integrations/destinations/apache-pulsar.mdx b/integrations/destinations/apache-pulsar.mdx index 7b7f32ab..9c58ec36 100644 --- a/integrations/destinations/apache-pulsar.mdx +++ b/integrations/destinations/apache-pulsar.mdx @@ -36,36 +36,34 @@ FORMAT data_format ENCODE data_encode [ ( ## Parameters -| Parameter Names | Description | -| ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Parameter names | Description | +| :---------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | topic | Required. The address of the Pulsar topic. One source can only correspond to one topic. | | service.url | Required. The address of the Pulsar service. | | auth.token | Optional. A token for auth. If both auth.token and oauth are set, only oauth authorization is considered. | | oauth.issuer.url | Optional. The issuer URL for OAuth2\. This field must be filled if other oauth fields are specified. | -| oauth.credentials.url | Optional. The path for credential files, which starts with file://. This field must be filled if other oauth fields are specified. | +| oauth.credentials.url | Optional. The path for credential files, which starts with `file://`. This field must be filled if other oauth fields are specified. | | oauth.audience | Optional. The audience for OAuth2\. This field must be filled if other oauth fields are specified. | | oauth.scope | Optional. The scope for OAuth2. | | aws.credentials.access\_key\_id | Optional. The AWS access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. | | aws.credentials.secret\_access\_key | Optional. The AWS secret access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. | | max\_retry\_num | Optional. The maximum number of times to retry sending a batch to Pulsar. This allows retrying in case of transient errors. The default value is 3. | | retry\_interval | Optional. The time in milliseconds to wait after a failure before retrying to send a batch. The default value is 100ms. | -| primary\_key | Optional. The primary keys of the sink. Use ',' to delimit the primary key columns. Primary keys are optional when creating a PLAIN sink but required for UPSERT and DEBEZIUM sinks. | +| primary\_key | Optional. The primary keys of the sink. Use `,` to delimit the primary key columns. Primary keys are optional when creating a PLAIN sink but required for UPSERT and DEBEZIUM sinks. | ## FORMAT and ENCODE options - -**NOTE** - + These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause - + | Field | Notes | -| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. DEBEZIUM: Output change data capture (CDC) log in Debezium format. UPSERT: Output data as a changelog stream. primary\_key must be specified in this case. To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). | +| :------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| data\_format | Data format. Allowed formats:
  • `PLAIN`: Output data with insert operations.
  • `DEBEZIUM`: Output change data capture (CDC) log in Debezium format.
  • `UPSERT`: Output data as a changelog stream. `primary_key` must be specified in this case.
To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). | | data\_encode | Data encode. Supported encode: JSON. | | force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. | -| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding. \- If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z. \- When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000. | -| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). | +| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
  • If omitted, the output format of timestamptz is `2023-11-11T18:30:09.453000Z` which includes the UTC suffix `Z`.
  • When `utc_without_suffix` is specified, the format is changed to `2023-11-11 18:30:09.453000`.
| +| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. | ## Example diff --git a/integrations/destinations/aws-kinesis.mdx b/integrations/destinations/aws-kinesis.mdx index 521343a7..6d5eacb1 100644 --- a/integrations/destinations/aws-kinesis.mdx +++ b/integrations/destinations/aws-kinesis.mdx @@ -28,8 +28,8 @@ FORMAT data_format ENCODE data_encode [ ( ## Basic parameters -| Field | Notes | -| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Field | Notes | +| :---------------------------------- | :--------------------- | | stream | Required. Name of the stream. | | aws.region | Required. AWS service region. For example, US East (N. Virginia). | | endpoint | Optional. URL of the entry point for the AWS Kinesis service. | @@ -38,29 +38,24 @@ FORMAT data_format ENCODE data_encode [ ( | aws.credentials.session\_token | Optional. The session token associated with the temporary security credentials. | | aws.credentials.role.arn | Optional. The Amazon Resource Name (ARN) of the role to assume. | | aws.credentials.role.external\_id | Optional. The [external id](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) used to authorize access to third-party resources. | -| primary\_key | Required. The primary keys of the sink. Use ',' to delimit the primary key columns. | - - -**NOTE** +| primary\_key | Required. The primary keys of the sink. Use `,` to delimit the primary key columns. | In the Kinesis sink, we use [PutRecords](https://docs.aws.amazon.com/kinesis/latest/APIReference/API%5FPutRecords.html) API to send multiple records in batches to achieve higher throughput. Due to the limitations of Kinesis, records might be out of order when using this API. Nevertheless, the current implementation of the Kinesis sink guarantees at-least-once delivery and eventual consistency. - -## FORMAT and ENCODE options - -**NOTE** +## FORMAT and ENCODE options + These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause - + -| Field | Notes | -| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. DEBEZIUM: Output change data capture (CDC) log in Debezium format. UPSERT: Output data as a changelog stream. primary\_key must be specified in this case. To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). | -| data\_encode | Data encode. Supported encode: JSON. | -| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. | -| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding. \- If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z. \- When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000. | -| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). | +| Field | Notes | +| :---------------------------------- | :--------------------- | +| data\_format | Data format. Allowed formats:
  • `PLAIN`: Output data with insert operations.
  • `DEBEZIUM`: Output change data capture (CDC) log in Debezium format.
  • `UPSERT`: Output data as a changelog stream. `primary_key` must be specified in this case.
To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). | +| data\_encode | Data encode. Supported encode: `JSON`. | +| force\_append\_only | If `true`, forces the sink to be `PLAIN` (also known as `append-only`), even if it cannot be. | +| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
  • If omitted, the output format of timestamptz is `2023-11-11T18:30:09.453000Z` which includes the UTC suffix `Z`.
  • When `utc_without_suffix` is specified, the format is changed to `2023-11-11 18:30:09.453000`.
| +| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. | ## Examples diff --git a/integrations/destinations/aws-s3.mdx b/integrations/destinations/aws-s3.mdx new file mode 100644 index 00000000..6d9fabea --- /dev/null +++ b/integrations/destinations/aws-s3.mdx @@ -0,0 +1,51 @@ +--- + title: Sink data to AWS S3 + sidebarTitle: AWS S3 + description: This guide describes how to sink data from RisingWave to Amazon S3 sink using S3 connector in RisingWave. +--- + +[Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html) is an object storage service that offers industry-leading scalability, data availability, security, and performance. + +## Syntax + +```sql +CREATE SINK [ IF NOT EXISTS ] sink_name +[FROM sink_from | AS select_query] +WITH ( + connector='s3', + connector_parameter = 'value', ... +); +``` + +## Parameters + +| Parameter names | Description | +|-|-| +| connector | Required. Support the S3 connector only.| +| s3.region_name | Required. The service region. | +| s3.bucket_name | Required. The name of the bucket where the sink data is stored in. | +| s3.path | Required. The directory where the sink file is located.| +| s3.credentials.access | Optional. The access key ID of AWS. | +| s3.credentials.secret | Optional. The secret access key of AWS. | +| s3.endpoint_url | Optional. The host URL for an S3-compatible object storage server. This allows users to use a different server instead of the standard S3 server.| +| s3.assume_role | Optional. Specifies the ARN of an IAM role to assume when accessing S3. It allows temporary, secure access to S3 resources without sharing long-term credentials. | +| type | Required. Defines the type of the sink. Options include `append-only` or `upsert`.| + +## Example + +```sql +CREATE SINK s3_sink AS SELECT v1 +FROM t +WITH ( + connector='s3', + s3.path = '', + s3.region_name = '', + s3.bucket_name = '', + s3.credentials.account_name = '', + s3.credentials.account_key = '', + s3.endpoint_url = '', + type = 'append-only', +)FORMAT PLAIN ENCODE PARQUET(force_append_only=true); +``` + +For more information about encode `Parquet` or `JSON`, see [Sink data in parquet or json encode](/docs/current/data-delivery/). \ No newline at end of file diff --git a/integrations/destinations/azure-blob.mdx b/integrations/destinations/azure-blob.mdx index 1b16df4b..440620f9 100644 --- a/integrations/destinations/azure-blob.mdx +++ b/integrations/destinations/azure-blob.mdx @@ -28,7 +28,7 @@ WITH ( ## Parameters | Parameter names | Description | -| -------------------------------- | ------------------------------------------------------------------------------ | +| :------------------------------- | :----------------------------------------------------------------------------- | | azblob.container\_name | Required. The name of the Azure Blob Storage container. | | azblob.path | Required. The directory where the sink file is located. | | azblob.credentials.account\_name | Optional. The Azure Storage account name for authentication. | @@ -52,4 +52,4 @@ WITH ( )FORMAT PLAIN ENCODE PARQUET(force_append_only=true); ``` -For more information about encode `Parquet`, see [Sink data in parquet format](/docs/current/data-delivery/#sink-data-in-parquet-format). +For more information about encode `Parquet` or `JSON`, see [Sink data in parquet or json encode](/docs/current/data-delivery/). diff --git a/integrations/destinations/bigquery.mdx b/integrations/destinations/bigquery.mdx index 12321794..73d88f6e 100644 --- a/integrations/destinations/bigquery.mdx +++ b/integrations/destinations/bigquery.mdx @@ -41,14 +41,14 @@ WITH ( ## Parameters | Parameter Names | Description | -| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :---------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _sink\_name_ | Name of the sink to be created. | -| _sink\_from_ | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or _select\_query_ query must be specified. | -| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a _sink\_from_ clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | -| type | Required. Data format. Allowed formats: append-only: Output data with insert operations.upsert: For this type, you need to set corresponding permissions and primary keys based on the [Document of BigQuery](https://cloud.google.com/bigquery/docs/change-data-capture). | +| _sink\_from_ | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or `select_query` query must be specified. | +| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a `sink_from` clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | +| type | Required. Data format. Allowed formats:
  • `append-only`: Output data with insert operations.
  • `upsert`: For this type, you need to set corresponding permissions and primary keys based on the [Document of BigQuery](https://cloud.google.com/bigquery/docs/change-data-capture).
| | force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. | -| bigquery.local.path | Optional. The file path leading to the JSON key file located in your local server. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. Either bigquery.local.path or bigquery.s3.path must be specified. | -| bigquery.s3.path | Optional. The file path leading to the JSON key file located in S3\. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. At least one of bigquery.local.path or bigquery.s3.path must be specified. | +| bigquery.local.path | Optional. The file path leading to the JSON key file located in your local server. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. Either `bigquery.local.path` or `bigquery.s3.path` must be specified. | +| bigquery.s3.path | Optional. The file path leading to the JSON key file located in S3\. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. At least one of `bigquery.local.path` or `bigquery.s3.path` must be specified. | | bigquery.project | Required. The BigQuery project ID. | | bigquery.dataset | Required. The BigQuery dataset ID. | | bigquery.table | Required. The BigQuery table you want to sink to. | @@ -98,7 +98,7 @@ WITH ( ## Data type mapping | RisingWave Data Type | BigQuery Data Type | -| --------------------------- | ------------------ | +| :-------------------------- | :----------------- | | boolean | bool | | smallint | int64 | | integer | int64 | diff --git a/integrations/destinations/cassandra-or-scylladb.mdx b/integrations/destinations/cassandra-or-scylladb.mdx index 2f9d9da3..b3506b7e 100644 --- a/integrations/destinations/cassandra-or-scylladb.mdx +++ b/integrations/destinations/cassandra-or-scylladb.mdx @@ -39,10 +39,10 @@ Once the sink is created, data changes will be streamed to the specified table. ## Parameters | Parameter Names | Description | -| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | _sink\_name_ | Name of the sink to be created. | -| _sink\_from_ | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or _select\_query_ query must be specified. | -| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a _sink\_from_ clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | +| _sink\_from_ | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or `select_query` query must be specified. | +| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a `sink_from` clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | | type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, you must specify a primary key. | | primary\_key | Optional. A string of a list of column names, separated by commas, that specifies the primary key of the Cassandra sink. | | force\_append\_only | If true, forces the sink to be append-only, even if it cannot be. | @@ -58,7 +58,7 @@ Once the sink is created, data changes will be streamed to the specified table. ## Data type mapping - RisingWave and Cassandra | RisingWave Data Type | Cassandra Data Type | -| --------------------------- | --------------------------------------------------------------------------------------- | +| :-------------------------- | :-------------------------------------------------------------------------------------- | | boolean | boolean | | smallint | smallint | | integer | int | diff --git a/integrations/destinations/clickhouse.mdx b/integrations/destinations/clickhouse.mdx index 1ba3a2bd..d72fa9d4 100644 --- a/integrations/destinations/clickhouse.mdx +++ b/integrations/destinations/clickhouse.mdx @@ -11,11 +11,9 @@ ClickHouse is a high-performance, column-oriented SQL database management system * Ensure you already have a ClickHouse table that you can sink data to. For additional guidance on creating a table and setting up ClickHouse, refer to this [quick start guide](https://clickhouse.com/docs/en/getting-started/quick-start). * Ensure you have an upstream materialized view or source that you can sink data from. - -**NOTE** - + We highly recommend using the deduplication engine, like ReplacingMergeTree, in ClickHouse. This is because it addresses the potential problem of duplicate writes in ClickHouse during RisingWave recovery when primary keys can be duplicated. - + ## Syntax @@ -30,17 +28,17 @@ WITH ( ## Parameters -| Parameter Names | Description | -| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, see the [Overview](/docs/current/data-delivery/) on when to define the primary key and Upsert sinks on limitations. | +| Parameter Names | Description | +| :--------------------------- | :------------------- | +| type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, see the [Overview](/docs/current/data-delivery/) on when to define the primary key and [Upsert sinks](#upsert-sinks) on limitations. | | primary\_key | Optional. A string of a list of column names, separated by commas, that specifies the primary key of the ClickHouse sink. | | clickhouse.url | Required. Address of the ClickHouse server that you want to sink data to. Format: `http://ip:port`. The default port is 8123. | | clickhouse.user | Required. User name for accessing the ClickHouse server. | | clickhouse.password | Required. Password for accessing the ClickHouse server. | | clickhouse.database | Required. Name of the ClickHouse database that you want to sink data to. | | clickhouse.table | Required. Name of the ClickHouse table that you want to sink data to. | -| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. | -| clickhouse.delete.column | Optional. Add this parameter when using ClickHouse's ReplacingMergeTree and setting up the delete column. You can run an upsert sink using the ReplacingMergeTree engine. | +| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10.
The behavior of this field also depends on the `sink_decouple` setting:
  • If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
  • If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
  • If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
| +| clickhouse.delete.column | Optional. You can run an upsert sink using the ReplacingMergeTree engine. When using the ReplacingMergeTree engine, you can specify the delete column with this parameter. | ### Upsert sinks @@ -170,7 +168,7 @@ WITH ( ## Data type mapping | RisingWave Data Type | ClickHouse Data Type | -| ---------------------- | --------------------------------------------------------------------------------------------- | +| :--------------------- | :------------------------------ | | boolean | Bool | | smallint | Int16 or UInt16 | | integer | Int32 or UInt32 | @@ -182,28 +180,20 @@ WITH ( | bytea | Not supported | | date | Date32 | | time without time zone | Not supported | -| timestamp | Not supported. You need to convert timestamp to timestamptz within RisingWave before sinking. | +| timestamp | Not supported. Please convert timestamp to timestamptz within RisingWave before sinking. | | timestamptz | DateTime64 | | interval | Not supported | | struct | Nested | | array | Array | | JSONB | Not supported | - -**NOTE** - -In ClickHouse, the `Nested` data type doe sn't support multiple levels of nesting. Therefore, when sinking RisingWave's `struct` data to ClickHouse, you need to flatten or restructure the nested data to align with ClickHouse's requirement. - - -**NOTE** +In ClickHouse, the `Nested` data type doesn't support multiple levels of nesting. Therefore, when sinking RisingWave's `struct` data to ClickHouse, you need to flatten or restructure the nested data to align with ClickHouse's requirement. Before v1.9, when inserting data into a ClickHouse sink, an error would be reported if the values were "nan (not a number)", "inf (infinity)", or "-inf (-infinity)". Since v1.9, we have made a change to this behavior. If the ClickHouse column is nullable, we will insert null values in such cases. If the column is not nullable, we will insert `0` instead. - - Please be aware that the range of specific values varies among ClickHouse types and RisingWave types. Refer to the table below for detailed information. -| ClickHouse type | RisingWave type | ClickHouse range | RisingWave range | -| --------------- | --------------- | --------------------------------------------------- | ------------------------------------------ | +| ClickHouse type | RisingWave type | ClickHouse range | RisingWave range | +| :-------------- | :-------------- | :------------------- | :---------------------- | | Date32 | DATE | 1900-01-01 to 2299-12-31 | 0001-01-01 to 9999-12-31 | | DateTime64 | TIMESTAMPTZ | 1900-01-01 00:00:00 to 2299-12-31 23:59:59.99999999 | 0001-01-01 00:00:00 to 9999-12-31 23:59:59 | diff --git a/integrations/destinations/cockroachdb.mdx b/integrations/destinations/cockroachdb.mdx index 11c7576c..5574d413 100644 --- a/integrations/destinations/cockroachdb.mdx +++ b/integrations/destinations/cockroachdb.mdx @@ -34,7 +34,7 @@ WITH ( ## Data type mapping | RisingWave Data Type | CockroachDB Data Type | -| --------------------------- | --------------------- | +| :-------------------------- | :-------------------- | | boolean | BOOL | | smallint | INT2 | | integer | INT4 | @@ -51,12 +51,9 @@ WITH ( | interval | INTERVAL | | JSONB | JSONB | | array | ARRAY | -| struct | unsupported | - - -**NOTE** +| struct | Unsupported | Only one-dimensional arrays in RisingWave can be sinked to CockroachDB. For array type, we only support `smallint`, `integer`, `bigint`, `real`, `double precision`, and `varchar` type now. - + diff --git a/integrations/destinations/delta-lake.mdx b/integrations/destinations/delta-lake.mdx index 94099051..e2242816 100644 --- a/integrations/destinations/delta-lake.mdx +++ b/integrations/destinations/delta-lake.mdx @@ -29,14 +29,14 @@ WITH ( ## Parameters | Parameter Names | Description | -| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | type | Required. Currently, only append-only is supported. | -| location | Required. The file path that the Delta Lake table is reading data from, as specified when creating the Delta Lake table. For AWS, start with s3:// or s3a://;For GCS, start with gs://; For local files, start with file://. | -| s3.endpoint | Required. Endpoint of the S3\. For MinIO object store backend, it should be http://${MINIO_HOST}:${MINIO_PORT}. For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html). | +| location | Required. The file path that the Delta Lake table is reading data from, as specified when creating the Delta Lake table. For AWS, start with `s3://` or `s3a://`;For GCS, start with `gs://`; For local files, start with `file://`. | +| s3.endpoint | Required. Endpoint of the S3.
  • For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
  • For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
| | s3.access.key | Required. Access key of the S3 compatible object store. | | s3.secret.key | Required. Secret key of the S3 compatible object store. | | gcs.service.account | Required for GCS. Specifies the service account JSON file as a string. | -| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. | +| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10.
The behavior of this field also depends on the `sink_decouple` setting:
  • If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
  • If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
  • If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
| ## Example diff --git a/integrations/destinations/elasticsearch.mdx b/integrations/destinations/elasticsearch.mdx index de888e34..a44be2cf 100644 --- a/integrations/destinations/elasticsearch.mdx +++ b/integrations/destinations/elasticsearch.mdx @@ -1,10 +1,10 @@ --- title: "Sink data from RisingWave to Elasticsearch" sidebarTitle: Elasticsearch -description: You can deliver the data that has been ingested and transformed in RisingWave to Elasticsearch to serve searches or analytics. +description: This guide describes how to sink data from RisingWave to Elasticsearch using the Elasticsearch sink connector in RisingWave. --- -This guide describes how to sink data from RisingWave to Elasticsearch using the Elasticsearch sink connector in RisingWave. +You can deliver the data that has been ingested and transformed in RisingWave to Elasticsearch to serve searches or analytics. [Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine capable of addressing a growing number of use cases. It centrally stores your data for lightning-fast search, fine‑tuned relevancy, and powerful analytics that scale with ease. @@ -14,18 +14,14 @@ The Elasticsearch sink connecter in RisingWave will perform index operations via * 5mb of updates * 5 seconds since the last flush (assuming new actions are queued) +The Elasticsearch sink connector in RisingWave provides at-least-once delivery semantics. Events may be redelivered in case of failures. + **PUBLIC PREVIEW** This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). - -**NOTE** - -The Elasticsearch sink connector in RisingWave provides at-least-once delivery semantics. Events may be redelivered in case of failures. - - ## Prerequisites * Ensure the Elasticsearch cluster (version 7.x or 8.x) is accessible from RisingWave. @@ -51,14 +47,15 @@ WITH ( ## Parameters -| Parameter | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Parameter | Description | +| :------------------- | :---------------- | | sink\_name | Name of the sink to be created. | -| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. | +| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. | | AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | | primary\_key | Optional. The primary keys of the sink. If the primary key has multiple columns, set a delimiter in the delimiter parameter below to join them. | -| index | Required if index\_column is not set. Name of the Elasticsearch index that you want to write data to. | -| index\_column | This parameter enables you to create a sink that writes to multiple indexes dynamically. The sink decides which index to write to based on a column. It is mutually exclusive with the parameter index. Only one of them **can and must** be set. When index is set, the write index of Elasticsearch is index. When index\_column is set, the index of Elasticsearch is the value of this column, which must be the string type. Since Elasticsearch sink defaults to the first column as the key, it is not recommended to place this column as the first column. | +| index | Required if `index_column` is not set. Name of the Elasticsearch index that you want to write data to. | +| index\_column | Allows writing to multiple indexes dynamically based on the column's value. This parameter is mutually exclusive with `index`. When `index` is set, the write index is `index`; When `index_column` is set, the target index is the value of this column, and it must be of `string` type. Avoiding setting this column as the first column, because the sink defaults to the first column as the key. | +| routing\_column |Optional. Allows setting a column as a routing key, so that it can direct writes to specific shards based on its value. | | retry\_on\_conflict | Optional. Number of retry attempts after an optimistic locking conflict occurs. | | batch\_size\_kb | Optional. Maximum size (in kilobytes) for each request batch sent to Elasticsearch. If the data exceeds this size,the current batch will be sent. | | batch\_num\_messages | Optional. Maximum number of messages per request batch sent to Elasticsearch. If the number of messages exceeds this size,the current batch will be sent. | @@ -68,15 +65,12 @@ WITH ( | password | Optional. Password for accessing the Elasticsearch endpoint. It must be used with username. | | delimiter | Optional. Delimiter for Elasticsearch ID when the sink's primary key has multiple columns. | - -**NOTE** +For versions under 8.x, there was once a parameter `type`. In Elasticsearch 6.x, users could directly set the type, but starting from 7.x, it is set to not recommended and the default value is unified to `_doc`. In version 8.x, the type has been completely removed. See [Elasticsearch's official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/removal-of-types.html) for more details. -For versions under 8.x, there was once a parameter `type`. In Elasticsearch 6.x, users could directly set the type, but starting from 7.x, it is set to not recommended and the default value is unified to '\_doc.' In version 8.x, the type has been completely removed. See [Elasticsearch's official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/removal-of-types.html) for more details. +So, if you are using Elasticsearch 7.x, we set it to the official's recommended value, which is `_doc`. If you are using Elasticsearch 8.x, this parameter has been removed by the Elasticsearch official, so no setting is required. - -So, if you are using Elasticsearch 7.x, we set it to the official's recommended value, which is '\_doc'. If you are using Elasticsearch 8.x, this parameter has been removed by the Elasticsearch official, so no setting is required. -### Notes about primary keys and Elasticsearch IDs +## Primary keys and Elasticsearch IDs The Elasticsearch sink defaults to the `upsert` sink type. It does not support the `append-only` sink type. @@ -89,7 +83,7 @@ If you don't want to customize your Elasticsearch ID, RisingWave will use the fi ElasticSearch uses a mechanism called [dynamic field mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html) to dynamically create fields and determine their types automatically. It treats all integer types as long and all floating-point types as float. To ensure data types in RisingWave are mapped to the data types in Elasticsearch correctly, we recommend that you specify the mapping via [index templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html) or [dynamic templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-templates.html) before creating the sink. | RisingWave Data Type | ElasticSearch Field Type | -| --------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| :-------------------------- | :------------------------ | | boolean | boolean | | smallint | long | | integer | long | @@ -108,11 +102,7 @@ ElasticSearch uses a mechanism called [dynamic field mapping](https://www.elasti | array | array | | JSONB | object (RisingWave's Elasticsearch sink will send JSONB as a JSON string, and Elasticsearch will convert it into an object) | - -**NOTE** - Elasticsearch doesn't require users to explicitly `CREATE TABLE`. Instead, it infers the schema on-the-fly based on the first record ingested. For example, if a record contains a jsonb `{v1: 100}`, v1 will be inferred as a long type. However, if the next record is `{v1: "abc"}`, the ingestion will fail because `"abc"` is inferred as a string and the two types are incompatible. This behavior should be noted, or your data may be less than it should be. In terms of monitoring, you can check out Grafana, where there is a panel for all sink write errors. - diff --git a/integrations/destinations/google-cloud-storage.mdx b/integrations/destinations/google-cloud-storage.mdx new file mode 100644 index 00000000..cba12043 --- /dev/null +++ b/integrations/destinations/google-cloud-storage.mdx @@ -0,0 +1,46 @@ +--- + title: Sink data to Google Cloud Storage + sidebarTitle: Google Cloud Storage + description: This guide describes how to sink data from RisingWave to Google Cloud Storage sink using GCS connector in RisingWave. +--- + +[Google Cloud Storage](https://cloud.google.com/storage/docs) is a RESTful online file storage web service for storing and accessing data on Google Cloud Platform infrastructure. + +## Syntax + +```sql +CREATE SINK [ IF NOT EXISTS ] sink_name +[FROM sink_from | AS select_query] +WITH ( + connector='gcs', + connector_parameter = 'value', ... +); +``` + +## Parameters + +| Parameter names | Description | +|-|-| +| connector | Required. Support the GCS connector only.| +| gcs.bucket_name | Required. The name of the bucket where the sink data is stored in. | +| gcs.credential | Required. Base64-encoded credential key obtained from the GCS service account key JSON file. To get this JSON file, refer to the [guides of GCS documentation](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console).
  • To encode it in base64, run the following command: cat ~/Downloads/rwc-byoc-test-464bdd851bce.json | base64 -b 0 | pbcopy, and then paste the output as the value for this parameter.
  • If this field is not specified, ADC (application default credentials) will be used.
| +| gcs.service_account| Optional. The service account of the GCS sink. If `gcs.credential` or ADC is not specified, the credentials will be derived from the service account.| +| gcs.path | Required. The directory where the sink file is located. | +| type | Required. Defines the type of the sink. Options include `append-only` or `upsert`. | + +## Example + +```sql +CREATE SINK gcs_sink AS SELECT v1 +FROM t1 +WITH ( + connector='gcs', + gcs.path = '', + gcs.bucket_name = '', + gcs.credential = '', + gcs.service_account = '' + type = 'append-only', +)FORMAT PLAIN ENCODE PARQUET(force_append_only=true); +``` + +For more information about encode `Parquet` or `JSON`, see [Sink data in parquet or json encode](/docs/current/data-delivery/). \ No newline at end of file diff --git a/integrations/destinations/google-pub-sub.mdx b/integrations/destinations/google-pub-sub.mdx index 42db5f36..9b0ff62e 100644 --- a/integrations/destinations/google-pub-sub.mdx +++ b/integrations/destinations/google-pub-sub.mdx @@ -23,7 +23,7 @@ FORMAT data_format ENCODE data_encode [ ( ## Basic parameter | Parameter | Description | -| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | pubsub.project\_id | Required. The Pub/Sub Project ID. | | pubsub.topic | Required. The Pub/Sub topic to publish messages. | | pubsub.endpoint | Required. The Pub/Sub endpoint URL. | @@ -32,18 +32,16 @@ FORMAT data_format ENCODE data_encode [ ( ## FORMAT and ENCODE option - -**NOTE** - + These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause. - + | Field | Note | -| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| data\_format | Data format. Allowed format: PLAIN. | -| data\_encode | Data encode. Supported encode: JSON. | -| force\_append\_only | Required by default and must be true, which forces the sink to be PLAIN (also known as append-only). | -| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). | +| :------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| data\_format | Data format. Allowed format: `PLAIN`. | +| data\_encode | Data encode. Supported encode: `JSON`. | +| force\_append\_only | Required by default and must be `true`, which forces the sink to be `PLAIN` (also known as append-only). | +| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. | ## Example You can test the function locally before you deploying it. See guide on how to [Test locally with the Pub/Sub emulator](https://cloud.google.com/functions/docs/local-development). diff --git a/integrations/destinations/mongodb.mdx b/integrations/destinations/mongodb.mdx index b908ee08..c0671d29 100644 --- a/integrations/destinations/mongodb.mdx +++ b/integrations/destinations/mongodb.mdx @@ -22,19 +22,19 @@ WITH ( ## Parameters -| **Parameter Name** | **Description** | -| -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Parameter Name | Description | +| :------------------------------- | :----------- | | mongodb.url | The URL of MongoDB. | -| type | Defines the type of the sink. Options include append-only or upsert. | -| collection.name | The collection name where data should be written to or read from. For sinks, the format is db\_name.collection\_name. Data can also be written to dynamic collections; see collection.name.field below for more information. | -| collection.name.field | Optional. The dynamic collection name where data should be sunk to. If specified, the field value will be used as the collection name. The collection name format is the same as collection.name. If the field value is null or an empty string, then the collection.name will be used as a fallback destination. | -| collection.name.field.drop | Optional. Controls whether the field value of collection.name.field should be dropped when sinking. Set this option to true to avoid the duplicate values of collection.name.field being written to the result collection. | -| mongodb.bulk\_write.max\_entries | Optional. The maximum entries that will accumulate before performing the bulk write. Defaults to 1024\. Default value is 1024. | +| type | Defines the type of the sink. Options include `append-only` or `upsert`. | +| collection.name | The collection name where data should be written to or read from. For sinks, the format is `db_name.collection_name`. Data can also be written to dynamic collections, see `collection.name.field` below for more information. | +| collection.name.field | Optional. The dynamic collection name where data should be sunk to.
  • If specified, the field value will be used as the collection name. The collection name format is the same as `collection.name`.
  • If the field value is null or an empty string, then the `collection.name` will be used as a fallback destination.
| +| collection.name.field.drop | Optional. Controls whether the field value of `collection.name.field` should be dropped when sinking. Set this option to `true` to avoid the duplicate values of `collection.name.field` being written to the result collection. | + ## Data type mapping -| **MongoDB Type** | **RisingWave Type** | -| ---------------- | --------------------------- | +| MongoDB Type | RisingWave Type | +| :--------------- | :-------------------------- | | Boolean | BOOLEAN | | 32-bit integer | SMALLINT | | 32-bit integer | INTEGER | @@ -92,14 +92,14 @@ WITH ( Assuming the schema of `t2` is: | name | type | pk | -| ----- | ---- | -- | +| :---- | :--- | :- | | id | int | ✔ | | value | text | | Given the record: | id | value | -| -- | ------------------- | +| :- | :------------------ | | 1 | 'example of record' | The record written to MongoDB will be: @@ -108,11 +108,9 @@ The record written to MongoDB will be: { "_id": 1, "id": 1, "value": "example of record" } ``` - -**NOTE** - + No redundant `id` field will exist if the primary key of `t2` is `_id`. - + ```sql compound key CREATE TABLE t3( diff --git a/integrations/destinations/mqtt.mdx b/integrations/destinations/mqtt.mdx index 5e32c23e..8035dffa 100644 --- a/integrations/destinations/mqtt.mdx +++ b/integrations/destinations/mqtt.mdx @@ -59,16 +59,16 @@ After the sink is created, you will continuously consume the data in the MQTT to ### Parameters | Field | Notes | -| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with tcp://, mqtt://, ssl://, or mqtts:// to denote the protocol. mqtts:// and ssl:// use native certificates if no CA is specified. | +| :----------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with `tcp://`, `mqtt://`, `ssl://`, or `mqtts://` to denote the protocol. `mqtts://` and `ssl://` use native certificates if no CA is specified. | | qos | Optional. The quality of service for publishing messages. Defaults to at\_most\_once. Options include at\_most\_once, at\_least\_once, or exactly\_once. | | username | Optional. Username for the MQTT broker. | | password | Optional. Password for the MQTT broker. | | client\_prefix | Optional. Prefix for the MQTT client ID. Defaults to "risingwave". | -| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects. If true, the broker clears all client states upon disconnect; if false, the broker retains the client state and resumes pending operations upon reconnection. | +| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects.
  • If true, the broker clears all client states upon disconnect;
  • If false, the broker retains the client state and resumes pending operations upon reconnection.
| | inflight\_messages | Optional. Maximum number of inflight messages. Defaults to 100. | -| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use fs:// prefix for file paths. | -| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use fs:// prefix for file paths. | -| topic | Required. The topic name to subscribe or publish to. Can include wildcard topics, e.g., /topic/#. | +| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use `fs://` prefix for file paths. | +| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use `fs://` prefix for file paths. | +| topic | Required. The topic name to subscribe or publish to. Can include wildcard topics, e.g., `/topic/#`. | | retain | Optional. Whether the message should be retained by the broker. | | r#type | Required. Type identifier. | diff --git a/integrations/destinations/mysql.mdx b/integrations/destinations/mysql.mdx index 36be65e9..a15d9aa6 100644 --- a/integrations/destinations/mysql.mdx +++ b/integrations/destinations/mysql.mdx @@ -1,14 +1,14 @@ --- title: "Sink data from RisingWave to MySQL with the JDBC connector" sidebarTitle: MySQL -description: This guide will introduce how to sink data from RisingWave to JDBC-available databases using the JDBC sink connector. MySQL is a commonly used RDS with a JDBC driver and it is available as a cloud database through AWS for easy setup and maintenance. We will show you how to configure MySQL and RisingWave to create a MySQL sink. The configurations for RisingWave when connecting to any JDBC-available database will be the same. +description: This guide will introduce how to sink data from RisingWave to JDBC-available databases using the JDBC sink connector. --- - -**NOTE** +MySQL is a commonly used RDS with a JDBC driver and it is available as a cloud database through AWS for easy setup and maintenance. We will show you how to configure MySQL and RisingWave to create a MySQL sink. The configurations for RisingWave when connecting to any JDBC-available database will be the same. + The supported MySQL versions are 5.7 and 8.0.x - + ## Set up a MySQL database @@ -118,15 +118,15 @@ WITH ( All `WITH` options are required. | Parameter or clause | Description | -| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------- | | sink\_name | Name of the sink to be created. | -| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. | -| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified.See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | -| connector | Sink connector type must be 'jdbc' for MySQL sink. | +| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. | +| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | +| connector | Sink connector type must be `jdbc` for MySQL sink. | | jdbc.url | The JDBC URL of the destination database necessary for the driver to recognize and connect to the database. | | jdbc.query.timeout | Specifies the timeout for the operations to downstream. If not set, the default is 10 minutes. | | table.name | The table in the destination database you want to sink to. | -| type | Data format. Allowed formats: append-only: Output data with insert operations. upsert: Output data as a changelog stream. | +| type | Data format. Allowed formats:
  • `append-only`: Output data with insert operations.
  • `upsert`: Output data as a changelog stream.
| | primary\_key | Required if type is upsert. The primary key of the downstream table. | ## Sink data from RisingWave to MySQL @@ -184,6 +184,6 @@ For the MySQL data type mapping table, see the [Data type mapping table](/docs/c Additional notes regarding sinking data to MySQL: -* Note that array data types in RisingWave when sinked to MySQL will be converted to a string. Only one-dimensional arrays can be sinked to MySQL. For instance, `ARRAY['Value 1', 'Value 2']` when sinked to MySQL will be converted to the string `Value 1, Value 2`. +* Array data types in RisingWave when sinked to MySQL will be converted to a string. Only one-dimensional arrays can be sinked to MySQL. For instance, `ARRAY['Value 1', 'Value 2']` when sinked to MySQL will be converted to the string `Value 1, Value 2`. * For array type, we only support `smallint`, `integer`, `bigint`, `real`, `double precision`, and `varchar` type now. * It's better to set `connectionTimeZone=UTC` in `jdbc.url` to get the correct `timestamptz` type data. For more details, see [MySQL's documentation](https://dev.mysql.com/doc/connector-j/en/connector-j-usagenotes-known-issues-limitations.html). diff --git a/integrations/destinations/nats-and-nats-jetstream.mdx b/integrations/destinations/nats-and-nats-jetstream.mdx index c3c56923..eb6c398a 100644 --- a/integrations/destinations/nats-and-nats-jetstream.mdx +++ b/integrations/destinations/nats-and-nats-jetstream.mdx @@ -45,14 +45,9 @@ WITH ( After the sink is created, RisingWave will continuously sink data to the NATS subject in append-only mode. - -**NOTE** - + The NATS sink connector in RisingWave provides at-least-once delivery semantics. Events may be redelivered in case of failures. - - - -**NOTE** + According to the [NATS documentation](https://docs.nats.io/running-a-nats-service/nats%5Fadmin/jetstream%5Fadmin/naming), stream names must adhere to subject naming rules as well as be friendly to the file system. Here are the recommended guidelines for stream names: @@ -62,15 +57,14 @@ According to the [NATS documentation](https://docs.nats.io/running-a-nats-servic * Keep the name length limited to 32 characters as the JetStream storage directories include the account, stream name, and consumer name. * Avoid using reserved file names like `NUL` or `LPT1`. * Be cautious of case sensitivity in file systems. To prevent collisions, ensure that stream or account names do not clash due to case differences. For example, `Foo` and `foo` would collide on Windows or macOS systems. - -### Parameters +## Parameters | Field | Notes | -| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server\_url | Required. URLs of the NATS server, in the format of _address_:_port_. If multiple addresses are specified, use commas to separate them. | +| :-------------------- | :--------------------------- | +| server\_url | Required. URLs of the NATS server, in the format of `address:port`. If multiple addresses are specified, use commas to separate them. | | subject | Required. NATS subject that you want to sink data to. | -| connect\_mode | Required. Authentication mode for the connection. Allowed values: plain: No authentication; user\_and\_password: Use user name and password for authentication. For this option, username and password must be specified; credential: Use JSON Web Token (JWT) and NKeys for authentication. For this option, jwt and nkey must be specified. | +| connect\_mode | Required. Authentication mode for the connection. Allowed values:
  • `plain`: No authentication;
  • `user_and_password`: Use user name and password for authentication. For this option, username and password must be specified;
  • `credential`: Use JSON Web Token (JWT) and NKeys for authentication. For this option, jwt and nkey must be specified.
| | jwt and nkey | JWT and NKEY for authentication. For details, see [JWT](https://docs.nats.io/running-a-nats-service/configuration/securing%5Fnats/auth%5Fintro/jwt) and [NKeys](https://docs.nats.io/running-a-nats-service/configuration/securing%5Fnats/auth%5Fintro/nkey%5Fauth). | -| username and password | Conditional. The client user name and password. Required when connect\_mode is user\_and\_password. | -| type | Required. Sink data type. Its value should be append-only. | +| username and password | Conditional. The client user name and password. Required when `connect_mode` is `user_and_password`. | +| type | Required. Sink data type. Its value should be `append-only`. | diff --git a/integrations/destinations/opensearch.mdx b/integrations/destinations/opensearch.mdx index 6a2a0048..9802eb67 100644 --- a/integrations/destinations/opensearch.mdx +++ b/integrations/destinations/opensearch.mdx @@ -6,9 +6,11 @@ description: This guide describes how to sink data from RisingWave to OpenSearch OpenSearch is the flexible, scalable, open-source way to build solutions for data-intensive applications. For more information about OpenSearch, see [OpenSearch official website](https://opensearch.org/). -Premium Edition Feature + +**PREMIUM EDITION FEATURE** This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com). + ## Prerequisites @@ -35,14 +37,14 @@ WITH ( ## Parameters -| Parameter | Description | -| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Parameter | Description | +| :--------------- | :-------------------- | | sink\_name | Name of the sink to be created. | -| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. | +| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. | | AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | | primary\_key | Optional. The primary keys of the sink. If the primary key has multiple columns, set a delimiter in the delimiter parameter below to join them. | | index | Required if index\_column is not set. Name of the OpenSearch index that you want to write data to. | -| index\_column | This parameter enables you to create a sink that writes to multiple indexes dynamically. The sink decides which index to write to based on a column. It is mutually exclusive with the parameter index. Only one of them **can and must** be set. When index is set, the write index of OpenSearch is index. When index\_column is set, the index of OpenSearch is the value of this column, which must be the string type. Since OpenSearch sink defaults to the first column as the key, it is not recommended to place this column as the first column. | +| index\_column | This parameter enables you to create a sink that writes to multiple indexes dynamically. The sink decides which index to write to based on a column. It is mutually exclusive with the parameter index. Only one of them **can and must** be set.
  • When `index` is set, the write index of OpenSearch is index.
  • When `index_column` is set, the index of OpenSearch is the value of this column, which must be the string type.
Since OpenSearch sink defaults to the first column as the key, it is not recommended to place this column as the first column. | | url | Required. URL of the OpenSearch REST API endpoint. | | username | Optional. opensearch user name for accessing the OpenSearch endpoint. It must be used with password. | | password | Optional. Password for accessing the OpenSearch endpoint. It must be used with username. | @@ -61,7 +63,7 @@ If you don't want to customize your OpenSearch ID, RisingWave will use the first OpenSearch uses a mechanism called [dynamic field mapping](https://opensearch.org/docs/latest/field-types/#dynamic-mapping) to dynamically create fields and determine their types automatically. It treats all integer types as long and all floating-point types as float. To ensure data types in RisingWave are mapped to the data types in OpenSearch correctly, we recommend that you specify the mapping via [index templates](https://opensearch.org/docs/latest/im-plugin/index-templates/). | RisingWave Data Type | OpenSearch Field Type | -| --------------------------- | --------------------------------------------------------------------------------------------------------------------- | +| :-------------------------- | :-------------------------------------------------------------------------------------------------------------------- | | boolean | boolean | | smallint | long | | integer | long | @@ -80,10 +82,7 @@ OpenSearch uses a mechanism called [dynamic field mapping](https://opensearch.or | array | array | | JSONB | object (RisingWave's OpenSearch sink will send JSONB as a JSON string, and OpenSearch will convert it into an object) | - -**NOTE** OpenSearch doesn't require users to explicitly `CREATE TABLE`. Instead, it infers the schema on-the-fly based on the first record ingested. For example, if a record contains a jsonb `{v1: 100}`, v1 will be inferred as a long type. However, if the next record is `{v1: "abc"}`, the ingestion will fail because `"abc"` is inferred as a string and the two types are incompatible. - This behavior may lead to missing records. For monitoring, see Grafana, where there is a panel for all sink write errors. diff --git a/integrations/destinations/postgresql.mdx b/integrations/destinations/postgresql.mdx index c0b4c4bc..fd655579 100644 --- a/integrations/destinations/postgresql.mdx +++ b/integrations/destinations/postgresql.mdx @@ -1,9 +1,10 @@ --- title: "Sink data from RisingWave to PostgreSQL" sidebarTitle: PostgreSQL -description: This guide will show you how to sink data from RisingWave to PostgreSQL using the JDBC connector. The sink parameters are similar to those for other JDBC-available databases, such as MySQL. However, we will cover the configurations specific to PostgreSQL and how to verify that data is successfully sunk. --- +This guide will show you how to sink data from RisingWave to PostgreSQL using the JDBC connector. The sink parameters are similar to those for other JDBC-available databases, such as MySQL. However, we will cover the configurations specific to PostgreSQL and how to verify that data is successfully sunk. + You can test out this process on your own device by using the `postgres-sink` demo in the [integration\_test directory](https://github.com/risingwavelabs/risingwave/tree/main/integration%5Ftests) of the RisingWave repository. ## Set up a PostgreSQL database @@ -38,15 +39,11 @@ For more login options, refer to the [RDS connection guide](https://docs.aws.ama To install PostgreSQL locally, see their [download options](https://www.postgresql.org/download/). - -**NOTE** - If you are using the demo version, connect to PostgreSQL with the following command. Ensure that all other programs are disconnected from port 5432. ```bash psql postgresql://myuser:123456@127.0.0.1:5432/mydb ``` - Ensure that the Postgres user is granted the following privileges on the used table with the following SQL query. @@ -96,16 +93,16 @@ WITH ( All `WITH` options are required unless noted. | Parameter or clause | Description | -| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :-------------- | | sink\_name | Name of the sink to be created. | -| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. | -| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified.See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | -| connector | Sink connector type must be 'jdbc' for PostgresQL sink. | +| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. | +| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | +| connector | Sink connector type must be `jdbc` for PostgresQL sink. | | jdbc.url | The JDBC URL of the destination database necessary for the driver to recognize and connect to the database. | | jdbc.query.timeout | Specifies the timeout for the operations to downstream. If not set, the default is 10 minutes. | | table.name | The table in the destination database you want to sink to. | | schema.name | Optional. The schema in the destination database you want to sink to. The default value is public. | -| type | Sink data type. Supported types: append-only: Sink data as INSERT operations. upsert: Sink data as UPDATE, INSERT and DELETE operations. | +| type | Sink data type. Supported types:
  • `append-only`: Sink data as INSERT operations.
  • `upsert`: Sink data as UPDATE, INSERT and DELETE operations.
| | primary\_key | Required if type is upsert. The primary key of the sink, which should match the primary key of the downstream table. | ## Sink data from RisingWave to PostgreSQL diff --git a/integrations/destinations/redis.mdx b/integrations/destinations/redis.mdx index 6155e3ec..adf54994 100644 --- a/integrations/destinations/redis.mdx +++ b/integrations/destinations/redis.mdx @@ -32,27 +32,26 @@ FORMAT data_format ENCODE data_encode [ ( ## Parameters -| Parameter Names | Description | -| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| redis.url | Required. Choose either the Redis cluster address or a non-cluster Redis address. If the address is a cluster address, it should be in the form of a JSON array, like redis.url= '\["redis://redis-server:6379/"\]'. If the address is a non-cluster address, it should be in the form of a string, like redis.url= 'redis://redis-server:6379/'. | -| primary\_key | Required. The primary keys of the sink. If necessary, use ',' to delimit the primary key columns. | +| Name | Description | +| :-------------- | :------------------ | +| redis.url | Required. Choose either the Redis cluster address or a non-cluster Redis address.
  • If the address is a cluster address, it should be in the form of a JSON array, like `redis.url= '["redis://redis-server:6379/"]'`.
  • If the address is a non-cluster address, it should be in the form of a string, like `redis.url= 'redis://redis-server:6379/'`.
| +| primary\_key | Required. The primary keys of the sink. If necessary, use `,` to delimit the primary key columns. | ## FORMAT and ENCODE options - -**NOTE** + These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause - + | Field | Notes | -| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. UPSERT: Output data as a changelog stream. | -| data\_encode | Data encoding. Supported encodings: JSON:date: number of days since the Common Era (CE).interval: P\Y\M\DT\H\M\S format string.time without time zone: number of milliseconds past the last midnight.timestamp: number of milliseconds since the Epoch.TEMPLATE: converts data to the string specified by key\_format/value\_format. | -| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. | -| key\_format | Required if data\_encode is TEMPLATE. Specify the format for the key as a string. | -| value\_format | Required if data\_encode is TEMPLATE. Specify the format for the value as a string. | -| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). | +| :------------------ | :-------------------------------------------------- | +| data\_format | Data format. Allowed formats:
  • `PLAIN`: Output data with insert operations.
  • `UPSERT`: Output data as a changelog stream.
| +| data\_encode | Data encoding. Supported encodings:
  • `JSON`:
    • `date`: number of days since the Common Era (CE).
    • `interval`: `PYMDTHMS` format string.
    • `time without time zone`: number of milliseconds past the last midnight.
    • `timestamp`: number of milliseconds since the Epoch.
  • `TEMPLATE`: converts data to the string specified by `key_format`/`value_format`.
| +| force\_append\_only | If true, forces the sink to be `PLAIN` (also known as append-only), even if it cannot be. | +| key\_format | Required if `data_encode` is `TEMPLATE`. Specify the format for the key as a string. | +| value\_format | Required if `data_encode` is `TEMPLATE`. Specify the format for the value as a string. | +| key\_encode | Optional.
  • When specified, the key encode can only be `TEXT`, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`;
  • When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`.
| ## Example diff --git a/integrations/destinations/snowflake.mdx b/integrations/destinations/snowflake.mdx index 651555ea..3b0e6fd6 100644 --- a/integrations/destinations/snowflake.mdx +++ b/integrations/destinations/snowflake.mdx @@ -27,11 +27,9 @@ This feature is in the public preview stage, meaning it's nearing the final prod * Ensure the S3 user account has `WRITE` permission. * Ensure that Snowflake and S3 are set up in the same manner as described in the [Automating Snowpipe for Amazon S3](https://docs.snowflake.com/en/user-guide/data-load-snowpipe-auto-s3), as RisingWave is only responsible for writing data to S3. - -**NOTE** - + RisingWave will not be responsible for deleting data already imported by S3\. You can manually set the lifecycle configuration of your S3 bucket to clear out unnecessary data. See [Lifecycle configuration](https://docs.aws.amazon.com/AmazonS3/latest/userguide/how-to-set-lifecycle-configuration-intro.html) and [Delete staged files](https://docs.snowflake.com/en/user-guide/data-load-snowpipe-manage#deleting-staged-files-after-snowpipe-loads-the-datafor) for more details. - + ## Syntax Use the following syntax to create a sink in RisingWave: @@ -50,12 +48,12 @@ WITH ( All parameters are required unless specified otherwise. | Parameter | Description | -| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :------------- | | s3.bucket\_name | The S3 bucket where intermediate sink files will be stored. | -| s3.path | Optional. The S3 path to be specified. If specified, the actual file location would be \://\/\. Otherwise, it would be \://\. | +| s3.path | Optional. The S3 path to be specified.
  • If specified, the actual file location would be `:///`.
  • If not, it would be `://`.
| | s3.credentials.access | S3 access credentials. | | s3.credentials.secret | S3 secret credentials. | -| s3.region\_name | The S3 region, e.g., us-east-2. | +| s3.region\_name | The S3 region, e.g., `us-east-2`. | | force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. | ## Data type mapping @@ -63,7 +61,7 @@ All parameters are required unless specified otherwise. The following table shows the corresponding data types between RisingWave and Snowflake. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/). | RisingWave type | Snowflake type | -| --------------- | ----------------------------------------------------------------- | +| :-------------- | :---------------------------------------------------------------- | | SMALLINT | SMALLINT | | INTEGER | INTEGER | | BIGINT | BIGINT | diff --git a/integrations/destinations/sql-server.mdx b/integrations/destinations/sql-server.mdx index 43c2a96c..2e8cc291 100644 --- a/integrations/destinations/sql-server.mdx +++ b/integrations/destinations/sql-server.mdx @@ -33,7 +33,7 @@ WITH ( ## Parameters | Parameter Names | Description | -| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------- | | type | Required. Allowed values: append-only and upsert. | | force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. | | primary\_key | Conditional. The primary keys of the sink. Use ',' to delimit the primary key columns. Primary keys are required for upsert sinks. | @@ -49,7 +49,7 @@ WITH ( The following table shows the corresponding data types between RisingWave and SQL Server that should be specified when creating a sink. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/). | SQL Server type | RisingWave type | -| --------------- | --------------------------- | +| :-------------- | :-------------------------- | | bit | boolean | | smallint | smallint | | int | integer | diff --git a/integrations/destinations/starrocks.mdx b/integrations/destinations/starrocks.mdx index 26b2c532..1e072696 100644 --- a/integrations/destinations/starrocks.mdx +++ b/integrations/destinations/starrocks.mdx @@ -30,8 +30,8 @@ WITH ( All parameters are required unless specified otherwise. -| Parameter names | Description | -| ---------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Parameter names | Description | +| :--------------------------- | :------------- | | starrocks.host | The StarRocks host address. | | starrocks.query\_port | The port to the MySQL server of the StarRocks frontend. | | starrocks.http\_port | The port to the HTTP server of the StarRocks frontend. | @@ -39,11 +39,11 @@ All parameters are required unless specified otherwise. | starrocks.password | The password associated with the user. | | starrocks.database | The StarRocks database where the target table is located | | starrocks.table | The StarRocks table you want to sink data to. | -| starrocks.partial\_update | Optional. If you set the value to "true", the partial update optimization feature of StarRocks will be enabled. This feature enhances ingestion performance in scenarios where there is a need to update a large number of rows with only a small number of columns. You can learn more about this feature in the [partial update optimization](https://docs.starrocks.io/docs/sql-reference/sql-statements/data-manipulation/UPDATE/#partial-updates-in-column-mode-since-v31) section of the StarRocks documentation. | -| type | Data format. Allowed formats: append-only: Output data with insert operations. upsert: Output data as a chagelog stream. In StarRocks, Primary Key table must be selected. | -| force\_append\_only | If true, forces the sink to be append-only, even if it cannot be. | -| primary\_key | Required if type is upsert. The primary key of the downstream table. | -| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. | +| starrocks.partial\_update | Optional. Set it to `true` to improve performance when you need to update many rows but only change a few columns in each row.| +| type | Data format. Allowed formats:
  • `append-only`: Output data with insert operations.
  • `upsert`: Output data as a chagelog stream. In StarRocks, Primary Key table must be selected.
| +| force\_append\_only | If `true`, forces the sink to be append-only, even if it cannot be. | +| primary\_key | Required if type is `upsert`. The primary key of the downstream table. | +| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10. The behavior of this field also depends on the `sink_decouple` setting:
  • If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
  • If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
  • If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
| ## Examples @@ -69,7 +69,7 @@ FROM bhv_mv WITH ( The following table shows the corresponding data type in RisingWave that should be specified when creating a sink. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/). | StarRocks type | RisingWave type | -| -------------- | ------------------------------------------------------------------------------------------------- | +| :------------- | :---------------------------------- | | BOOLEAN | BOOLEAN | | SMALLINT | SMALLINT | | INT | INTEGER | @@ -89,9 +89,5 @@ The following table shows the corresponding data type in RisingWave that should | JSON | JSONB | | BIGINT | SERIAL | - -**NOTE** -Before v1.9, when inserting data into a StarRocks sink, an error would be reported if the values were "nan (not a number)", "inf (infinity)", or "-inf (-infinity)". Since v1.9, we have made a change to the behavior. If a decimal value is out of bounds or represents "inf", "-inf", or "nan", we will insert null values. - - +If a decimal value is out of bounds or represents `inf`, `-inf`, or `nan`, RisingWave will insert null values. \ No newline at end of file diff --git a/integrations/destinations/tidb.mdx b/integrations/destinations/tidb.mdx index 0016ea8d..62f7100a 100644 --- a/integrations/destinations/tidb.mdx +++ b/integrations/destinations/tidb.mdx @@ -11,7 +11,7 @@ For the syntax, settings, and examples, see [Sink data from RisingWave to MySQL The following table shows the corresponding data types between RisingWave and TiDB. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/). | RisingWave type | TiDB type | -| --------------- | -------------------------------------------------- | +| :-------------- | :------------------------------------------------- | | BOOLEAN | BOOLEAN | | SMALLINT | TINYINT/SMALLINT | | INT | INT/MEDIUMINT | diff --git a/integrations/destinations/webhdfs.mdx b/integrations/destinations/webhdfs.mdx new file mode 100644 index 00000000..c3468368 --- /dev/null +++ b/integrations/destinations/webhdfs.mdx @@ -0,0 +1,42 @@ +--- + title: Sink data to WebHDFS + sidebarTitle: WebHDFS + description: This guide describes how to sink data from RisingWave to WebHDFS. +--- + +As a workaround for HDFS, WebHDFS allows external clients to execute Hadoop file system operations without necessarily running on the Hadoop cluster itself. Therefore, it reduces the dependency on the Hadoop environment when using HDFS. + +## Syntax + +```sql +CREATE SINK [ IF NOT EXISTS ] sink_name +[FROM sink_from | AS select_query] +WITH ( + connector='webhdfs', + connector_parameter = 'value', ... +); +``` + +## Parameters + +| Parameter names | Description | +|-|-| +| connector | Required. Support the WebHDFS connector only. | +| webhdfs.endpoint | Required. The endpoint for the WebHDFS service. | +| webhdfs.path | Required. The directory where the sink file is located. | +| type | Required. Defines the type of the sink. Options include `append-only` or `upsert`. | + +## Example + +```sql +CREATE SINK webhdfs_sink AS SELECT v1 +FROM t1 +WITH ( + connector='webhdfs', + webhdfs.path = '', + webhdfs.endpoint = '', + type = 'append-only', +)FORMAT PLAIN ENCODE PARQUET(force_append_only=true); +``` + +For more information about encode `Parquet` or `JSON`, see [Sink data in parquet or json encode](/docs/current/data-delivery/). \ No newline at end of file diff --git a/integrations/other/dbt.mdx b/integrations/other/dbt.mdx index 859a869c..1c9bebba 100644 --- a/integrations/other/dbt.mdx +++ b/integrations/other/dbt.mdx @@ -58,7 +58,7 @@ The dbt models for managing data transformations in RisingWave are similar to ty RisingWave accepts these [materializations](https://docs.getdbt.com/docs/build/materializations). | Materializations | Notes | -| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | table | This materialization creates a table. To use this materialization, add `{{ config(materialized='table') }}` to your model SQL files. | | view | Create a view. To use this materialization, add `{{ config(materialized='view') }}` to your model SQL files. | | ephemeral | This materialization uses [common table expressions](/docs/current/query-syntax-with-clause/) in RisingWave under the hood. To use this materialization, add `{{ config(materialized='ephemeral') }}` to your model SQL files. | diff --git a/integrations/sources/amazon-msk.mdx b/integrations/sources/amazon-msk.mdx index 3efcdd08..12f5aaf3 100644 --- a/integrations/sources/amazon-msk.mdx +++ b/integrations/sources/amazon-msk.mdx @@ -183,9 +183,8 @@ WITH ( Then, you can count the records for accuracy. -``` +```sql SELECT * FROM s; - ``` ## Access MSK using IAM @@ -208,7 +207,7 @@ RisingWave requires the following permissions to access MSK: To access MSK using IAM, you need to use the `AWS_MSK_IAM` SASL mechanism. You also need to specify the following parameters. | Parameter | Notes | -| ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :---------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | aws.region | Required. AWS service region. For example, US East (N. Virginia). | | aws.endpoint | Optional. URL of the entry point for the AWS Kinesis service. | | aws.credentials.access\_key\_id | Required. This field indicates the access key ID of AWS. | diff --git a/integrations/sources/apache-iceberg.mdx b/integrations/sources/apache-iceberg.mdx index b351e333..7be6ffa8 100644 --- a/integrations/sources/apache-iceberg.mdx +++ b/integrations/sources/apache-iceberg.mdx @@ -20,33 +20,31 @@ WITH ( ); ``` - -**NOTE** - + You don’t need to specify the column name for the Iceberg source, as RisingWave can derive it from the Iceberg table metadata directly. Use [DESCRIBE](/docs/current/sql-describe/) statement to view the column names and data types. - + ## Parameters | Field | Notes | -| -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| type | Required. Allowed values: appendonly and upsert. | -| s3.endpoint | Optional. Endpoint of the S3\. For MinIO object store backend, it should be http://${MINIO_HOST}:${MINIO_PORT}. For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html). | -| s3.region | Optional. The region where the S3 bucket is hosted. Either s3.endpoint or s3.region must be specified. | +| :------------- | :---------------------------------- | +| type | Required. Allowed values: `append-only` and `upsert`. | +| s3.endpoint | Optional. Endpoint of the S3.
  • For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
  • For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
| +| s3.region | Optional. The region where the S3 bucket is hosted. Either `s3.endpoint` or `s3.region` must be specified. | | s3.access.key | Required. Access key of the S3 compatible object store. | | s3.secret.key | Required. Secret key of the S3 compatible object store. | | database.name | Required. Name of the database that you want to ingest data from. | | table.name | Required. Name of the table that you want to ingest data from. | | catalog.name | Conditional. The name of the Iceberg catalog. It can be omitted for storage catalog but required for other catalogs. | -| catalog.type | Optional. The catalog type used in this table. Currently, the supported values are storage, rest, hive, jdbc, and glue. If not specified, storage is used. For details, see [Catalogs](#catalogs). | -| warehouse.path | Conditional. The path of the Iceberg warehouse. Currently, only S3-compatible object storage systems, such as AWS S3 and MinIO, are supported. It's required if the catalog.type is not rest. | -| catalog.url | Conditional. The URL of the catalog. It is required when catalog.type is not storage. | +| catalog.type | Optional. The catalog type used in this table. Currently, the supported values are `storage`, `rest`, `hive`, `jdbc`, and `glue`. If not specified, `storage` is used. For details, see [Catalogs](#catalogs). | +| warehouse.path | Conditional. The path of the Iceberg warehouse. Currently, only S3-compatible object storage systems, such as AWS S3 and MinIO, are supported. It's required if the `catalog.type` is not `rest`. | +| catalog.url | Conditional. The URL of the catalog. It is required when `catalog.type` is not `storage`. | ## Data type mapping RisingWave converts data types from Iceberg to RisingWave according to the following data type mapping table. | Iceberg Type | RisingWave Type | -| ------------ | --------------- | +| :----------- | :-------------- | | boolean | boolean | | integer | int | | long | bigint | diff --git a/integrations/sources/automq-kafka.mdx b/integrations/sources/automq-kafka.mdx index 5cc51652..eb9e6bae 100644 --- a/integrations/sources/automq-kafka.mdx +++ b/integrations/sources/automq-kafka.mdx @@ -20,11 +20,9 @@ Use Kafka’s command-line tools to create a topic. Ensure you have access to th ./kafka-topics.sh --create --topic example_topic --bootstrap-server 10.0.96.4:9092 --partitions 1 --replication-factor 1 ``` - -**NOTE** - + In this guide, `example_topic` and `10.0.96.4:9092` are used as examples of topic name and Kafka server address respectively. Please replace them with your actual topic name and Kafka server address. - + To check the result of the topic creation, use this command: @@ -79,10 +77,8 @@ In this guide, you can use JSON format and set the startup mode to `earliest` to SELECT * from your_source_name limit 1; ``` - -**NOTE** - + Replace `your_source_name` with the name you defined when creating the source. - + When you see actual results, that means that you have successfully ingested data from AutoMQ Kafka into RisingWave Cloud. You can now write more data into the topic, or transform the ingested data by creating materialized views in RisingWave Cloud. diff --git a/integrations/sources/azure-blob.mdx b/integrations/sources/azure-blob.mdx index 097a249a..f2107d25 100644 --- a/integrations/sources/azure-blob.mdx +++ b/integrations/sources/azure-blob.mdx @@ -11,7 +11,7 @@ Use the SQL statement below to connect RisingWave to Azure Blob Storage using Az ```sql CREATE SOURCE [ IF NOT EXISTS ] source_name schema_definition -[INCLUDE { file | offset } [AS ]] +[INCLUDE { file | offset | payload } [AS ]] WITH ( connector = 'azblob', connector_parameter = 'value', ... @@ -34,27 +34,27 @@ FORMAT data_format ENCODE data_encode ( ### Connector parameters | Field | Notes | -| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------------------- | :--------------- | | azblob.container\_name | Required. The name of the container the data source is stored in. | | azblob.credentials.account\_name | Optional. The name of the Azure Blob Storage account. | | azblob.credentials.account\_key | Optional. The account key for the Azure Blob Storage account. | | azblob.endpoint\_url | Required. The URL of the Azure Blob Storage service endpoint. | | match\_pattern | Conditional. Set to find object keys in azblob.container\_name that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. | -| compression\_format | Optional. Specifies the compression format of the file being read. When set to gzip or gz, the file reader reads all files with the .gz suffix; when set to None or not defined, the file reader will automatically read and decompress .gz and .gzip files. | +| compression\_format | Optional. Specifies the compression format of the file being read. When set to gzip or gz, the file reader reads all files with the `.gz` suffix; when set to `None` or not defined, the file reader will automatically read and decompress `.gz` and `.gzip` files. | ### Other parameters | Field | Notes | -| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| :---------------- | :---------------- | | _data\_format_ | Supported data format: PLAIN. | | _data\_encode_ | Supported data encodes: CSV, JSON, PARQUET. | -| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: 'true', 'false'. Default: 'true'. | -| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is \\n; for CSV encode, the delimiter can be one of ,, ;, E'\\t'. | +| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: `true`, `false`. Default is `true`. | +| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is `\n`; for CSV encode, the delimiter can be one of `,`, `;`, `E'\t'`. | ### Additional columns | Field | Notes | -| -------- | --------------------------------------------------------------------------------------------------------------------------- | +| :------- | :---------- | | _file_ | Optional. The column contains the file name where current record comes from. | | _offset_ | Optional. The column contains the corresponding bytes offset (record offset for parquet files) where current message begins | @@ -99,6 +99,20 @@ WITH ( match_pattern = '%Ring%*.ndjson', ) FORMAT PLAIN ENCODE JSON; ``` + +Use the `payload` keyword to ingest JSON data when you are unsure of the exact schema beforehand. Instead of defining specific column names and types at the very beginning, you can load all JSON data first and then prune and filter the data during runtime. Check the example below: + +```sql +CREATE TABLE table_include_payload (v1 int, v2 varchar) +INCLUDE payload +WITH ( + connector = 'azblob', + topic = 'azblob_1_partition_topic', + properties.bootstrap.server = 'message_queue:29092', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; +``` +
```sql diff --git a/integrations/sources/citus-cdc.mdx b/integrations/sources/citus-cdc.mdx index 6e0d87cc..44a95929 100644 --- a/integrations/sources/citus-cdc.mdx +++ b/integrations/sources/citus-cdc.mdx @@ -65,7 +65,7 @@ WITH ( Unless specified otherwise, the fields listed are required. Note that the value of these parameters should be enclosed in single quotation marks. | Field | Notes | -| ---------------- | ------------------------------------------------------------------------------------ | +| :--------------- | :----------------------------------------------------------------------------------- | | hostname | Hostname of the coordinator node. | | port | Port number of the coordinator node. | | username | Username of the database. | diff --git a/integrations/sources/coreflux-broker.mdx b/integrations/sources/coreflux-broker.mdx index c75ae310..2333998a 100644 --- a/integrations/sources/coreflux-broker.mdx +++ b/integrations/sources/coreflux-broker.mdx @@ -54,7 +54,7 @@ You are now ready to connect to your Coreflux Cloud broker using various clients ## Ingest and process fata from the Coreflux Broker ### 1\. Create a RisingWave cluster -Create a RisingWave cluster in [RisingWave Cloud](https://cloud.risingwave.com/) using the free plan. For more information, refer to the [RisingWave Cloud documentation](https://docs.risingwave.com/cloud/manage-clusters/). +Create a RisingWave cluster in [RisingWave Cloud](https://cloud.risingwave.com/) using the free plan. For more information, refer to the [RisingWave Cloud documentation](/cloud/manage-projects). ### 2\. Create a source[](#2-create-a-source "Direct link to 2. Create a source") diff --git a/integrations/sources/emqx.md b/integrations/sources/emqx.mdx similarity index 86% rename from integrations/sources/emqx.md rename to integrations/sources/emqx.mdx index 048e0627..f7e56df8 100644 --- a/integrations/sources/emqx.md +++ b/integrations/sources/emqx.mdx @@ -18,7 +18,7 @@ This guide will walk you through creating an EMQX broker on **EMQX Cloud** and c Start by signing up for an [EMQX Cloud](https://accounts.emqx.com/signup?continue=https%3A%2F%2Fwww.emqx.com%2Fcn%2Fcloud) account. The platform offers a 14-day free trial to explore its services. -![Sign Up for EMQX Cloud](../images/emqx_sign_up_for_emqx_cloud.png) +![Sign Up for EMQX Cloud](/images/emqx_sign_up_for_emqx_cloud.png) *Sign up for EMQX Cloud.* @@ -26,19 +26,19 @@ Start by signing up for an [EMQX Cloud](https://accounts.emqx.com/signup?continu Once logged in to your EMQX Cloud account, go to the dashboard and click **New Deployment** to create a new EMQX broker. -![Create a New Deployment](../images/emqx_create_a_new_deployment.png) +![Create a New Deployment](/images/emqx_create_a_new_deployment.png) *Create a new deployment.* Select the **Serverless** plan to get a free EMQX broker, leave all other settings at their default values, and click **Deploy**. -![Deploy a Serverless EMQX Broker](../images/emqx_deploy_a_serverless_emqx_broker.png) +![Deploy a Serverless EMQX Broker](/images/emqx_deploy_a_serverless_emqx_broker.png) *Deploy a serverless EMQX broker.* Once deployed, your serverless EMQX broker is ready to use. -![Serverless EMQX Deployment Overview](../images/emqx_serverless_emqx_deployment_overview.png) +![Serverless EMQX Deployment Overview](/images/emqx_serverless_emqx_deployment_overview.png) *Serverless EMQX Cloud deployment.* @@ -46,13 +46,13 @@ Once deployed, your serverless EMQX broker is ready to use. To secure your broker, configure authentication and authorization. Go to the **Access Control** -> **Authentication** page and add a **username** and **password** for your clients. -![Add Username and Password](../images/emqx_add_username_and_password.png) +![Add Username and Password](/images/emqx_add_username_and_password.png) *Add username and password for authentication.* Next, assign permissions for the username, enabling **publish** and **subscribe** actions for your MQTT topics. -![Set Authorization Details](../images/emqx_set_authorization_details.png) +![Set Authorization Details](/images/emqx_set_authorization_details.png) *Set authorization details.* @@ -77,7 +77,7 @@ Your broker is now ready to receive data from devices on the shop floor. Below i ### 1. Create a RisingWave cluster -Set up a RisingWave cluster on [RisingWave Cloud](https://cloud.risingwave.com/) using the free plan. Detailed setup instructions are available in the [RisingWave Cloud Documentation](https://docs.risingwave.com/cloud/manage-clusters/). +Set up a RisingWave cluster on [RisingWave Cloud](https://cloud.risingwave.com/) using the free plan. Detailed setup instructions are available in the [RisingWave Cloud Documentation](/cloud/manage-projects). ### 2. Create a source in RisingWave @@ -118,7 +118,7 @@ SELECT * FROM shop_floor_machine_data LIMIT 5; Here’s an example of the output: -![Query Result from the Source Table](../images/emqx_query_result_from_the_source_table.png) +![Query Result from the Source Table](/images/emqx_query_result_from_the_source_table.png) *Query result from the source table.* diff --git a/integrations/sources/google-cloud-storage.mdx b/integrations/sources/google-cloud-storage.mdx index 70c3c284..a9258daa 100644 --- a/integrations/sources/google-cloud-storage.mdx +++ b/integrations/sources/google-cloud-storage.mdx @@ -9,7 +9,7 @@ sidebarTitle: Google Cloud Storage ```sql CREATE SOURCE [ IF NOT EXISTS ] source_name schema_definition -[INCLUDE { file | offset } [AS ]] +[INCLUDE { file | offset | payload } [AS ]] WITH ( connector = 'gcs', connector_parameter = 'value', ... @@ -31,30 +31,30 @@ FORMAT data_format ENCODE data_encode ( ### Connector parameters -| Field | Notes | | -| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | -| gcs.bucket\_name | Required. The name of the bucket the data source is stored in. | | -| gcs.credential | Optional. The base64 encoded credential key. This key is obtained from the GCS service account key JSON file, and should be encoded with base64\. To get this JSON file, refer to the [guides of GCS documentation](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console). To encode it with base64, run the following command: cat \~/Downloads/rwc-byoc-test-464bdd851bce.json \| base64 -b 0 | pbcopy, and then paste the output as the value for this parameter. If this field is not specified, ADC (application default credentials) will be used. | -| gcs.service\_account | Optional. The service account of the target GCS source. If gcs.credential or ADC is not specified, the credentials will be derived from the service account. | | -| match\_pattern | Conditional. This field is used to find object keys in the bucket that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. | | -| compression\_format | Optional. This field specifies the compression format of the file being read. You can define compression\_format in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the .gz suffix. When set to None or not defined, the file reader will automatically read and decompress .gz and .gzip files. | | -| match\_pattern | Conditional. This field is used to find object keys in gcs.bucket\_name that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. | | +| Field | Notes | +| :--------- | :----------- | +| gcs.bucket\_name | Required. The name of the bucket the data source is stored in. | +| gcs.credential | Required. Base64-encoded credential key obtained from the GCS service account key JSON file. To get this JSON file, refer to the [guides of GCS documentation](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console).
  • To encode it in base64, run the following command: cat ~/Downloads/rwc-byoc-test-464bdd851bce.json | base64 -b 0 | pbcopy, and then paste the output as the value for this parameter.
  • If this field is not specified, ADC (application default credentials) will be used.
| +| gcs.service\_account | Optional. The service account of the target GCS source. If gcs.credential or ADC is not specified, the credentials will be derived from the service account. | +| match\_pattern | Conditional. This field is used to find object keys in the bucket that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. | +| compression\_format | Optional. This field specifies the compression format of the file being read. You can define `compression_format` in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the `.gz` suffix. When set to None or not defined, the file reader will automatically read and decompress `.gz` and `.gzip` files. | +| refresh.interval.sec | Optional. Configure the time interval between operations of listing files. It determines the delay in discovering new files, with a default value of 60 seconds. | ### Other parameters | Field | Notes | -| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| :---------------- | :--------------------------------------------------------------------------------------------------------------------------------------- | | _data\_format_ | Supported data format: PLAIN. | | _data\_encode_ | Supported data encodes: CSV, JSON, PARQUET. | | _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: 'true', 'false'. Default: 'true'. | -| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is \\n; for CSV encode, the delimiter can be one of ,, ;, E'\\t'. | +| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is `\n`; for CSV encode, the delimiter can be one of `,`, `;`, `E'\t'`. | ### Additional columns | Field | Notes | -| -------- | --------------------------------------------------------------------------------------------------------------------------- | +| :------- | :-------------------------------------------------------------------------------------------------------------------------- | | _file_ | Optional. The column contains the file name where current record comes from. | -| _offset_ | Optional. The column contains the corresponding bytes offset (record offset for parquet files) where current message begins | +| _offset_ | Optional. The column contains the corresponding bytes offset (record offset for parquet files) where current message begins. | ## Loading order of GCS files @@ -100,6 +100,20 @@ WITH ( match_pattern = '%Ring%*.ndjson', ) FORMAT PLAIN ENCODE JSON; ``` + +Use the `payload` keyword to ingest JSON data when you are unsure of the exact schema beforehand. Instead of defining specific column names and types at the very beginning, you can load all JSON data first and then prune and filter the data during runtime. Check the example below: + +```sql +CREATE TABLE table_include_payload (v1 int, v2 varchar) +INCLUDE payload +WITH ( + connector = 'gcs', + topic = 'gcs_1_partition_topic', + properties.bootstrap.server = 'message_queue:29092', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; +``` +
```sql diff --git a/integrations/sources/google-pub-sub.mdx b/integrations/sources/google-pub-sub.mdx index 75d8fcda..a03971a0 100644 --- a/integrations/sources/google-pub-sub.mdx +++ b/integrations/sources/google-pub-sub.mdx @@ -24,15 +24,15 @@ FORMAT data_format ENCODE data_encode ( ## Parameters | Field | Note | -| -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | pubsub.subscription | Required. Specifies where the Pub/Sub subscription to consume messages from. Pub/Sub is used to load-balance messages among all readers pulling from the same subscription, so one subscription (i.e., one source) can only be used for one materialized view (MV) that is shared between the actors of its fragment. Otherwise, different MVs on the same source will both receive part of the messages. | | pubsub.credentials | Required. A JSON string containing the service account credentials for authorization, see the [service-account credentials guide](https://developers.google.com/workspace/guides/create-credentials#create%5Fcredentials%5Ffor%5Fa%5Fservice%5Faccount). The provided account credential must have the pubsub.subscriber [role](https://cloud.google.com/pubsub/docs/access-control#pubsub.subscriber) and pubsub.viewer [role](https://cloud.google.com/pubsub/docs/access-control#pubsub.viewer). | -| pubsub.start\_offset.nanos | Optional. Cannot be set together with pubsub.start\_snapshot. Specifies a numeric timestamp in nanoseconds, ideally the publish timestamp of a message in the subscription. If present, the connector seeks the subscription to the timestamp and starts consuming from there. Note that the seek operation is subject to limitations based on the message retention policy of the subscription. | -| pubsub.start\_snapshot | Optional. Cannot be set together with pubsub.start\_offset.nanos. If present, the connector first seeks to the specified snapshot before starting consumption. | +| pubsub.start\_offset.nanos | Optional. Cannot be set together with `pubsub.start_snapshot`. Specifies a numeric timestamp in nanoseconds, ideally the publish timestamp of a message in the subscription. If present, the connector seeks the subscription to the timestamp and starts consuming from there. Note that the seek operation is subject to limitations based on the message retention policy of the subscription. | +| pubsub.start\_snapshot | Optional. Cannot be set together with `pubsub.start_offset.nanos`. If present, the connector first seeks to the specified snapshot before starting consumption. | | pubsub.parallelism | Optional. Specifies the number of parallel consumers to run for the subscription. If not specified, the default value is 1. | -**INFO** + We can only achieve at-least-once semantic for the Pub/Sub source rather than exactly once because the SDK cannot seek back to a specific message offset. diff --git a/integrations/sources/hivemq.md b/integrations/sources/hivemq.mdx similarity index 88% rename from integrations/sources/hivemq.md rename to integrations/sources/hivemq.mdx index 7de9cc6d..4a377f06 100644 --- a/integrations/sources/hivemq.md +++ b/integrations/sources/hivemq.mdx @@ -1,12 +1,7 @@ --- title: "Ingest data from Coreflux broker" -description: "You can ingest data from [HiveMQ](https://www.hivemq.com/)." sidebarTitle: HiveMQ --- ---- - - - You can ingest data from HiveMQ, a leading MQTT platform renowned for its reliability, scalability, and flexibility. HiveMQ extends the MQTT standard to provide a comprehensive IoT messaging solution, trusted by brands like Air France-KLM, BMW, Mercedes-Benz, and ZF. It is widely adopted across industries such as automotive, energy, logistics, and smart manufacturing. The core of HiveMQ is its high-performance, MQTT-compliant broker, ensuring fast and reliable data transmission. @@ -18,7 +13,7 @@ This section provides step-by-step instructions for creating a HiveMQ broker on To begin, sign up for a free trial of HiveMQ Cloud at [HiveMQ Cloud](http://console.hivemq.cloud/). This service offers cloud-based, enterprise-grade MQTT capabilities tailored for IoT messaging. -![HiveMQ Cloud Sign-Up](../images/hivemq_cloud_sign_up.png) +![HiveMQ Cloud Sign-Up](/images/hivemq_cloud_sign_up.png) *HiveMQ Cloud Sign-Up page.* @@ -29,13 +24,13 @@ After signing in, follow these steps to create a new HiveMQ cluster: 1. Select **Create New Cluster**. 2. Choose between **Serverless** and **Starter** cluster options. Select **Serverless** for a fast, easy setup. -![HiveMQ Cloud Cluster Creation](../images/hivemq_cloud_cluster_creation.png) +![HiveMQ Cloud Cluster Creation](/images/hivemq_cloud_cluster_creation.png) *HiveMQ Cloud Cluster Creation.* Once your **Serverless** HiveMQ cluster is set up, you can publish and subscribe to IoT events within minutes. -![HiveMQ Cloud Free Plans](../images/hivemq_cloud_free_plans.png) +![HiveMQ Cloud Free Plans](/images/hivemq_cloud_free_plans.png) *HiveMQ Cloud Free Plans: Serverless and Starter.* @@ -46,7 +41,7 @@ To securely connect and interact with the HiveMQ broker, you'll need to set up u - Add a **username** and **password**. - Assign the necessary permissions to **publish** and **subscribe** to the MQTT topics. -![HiveMQ Cluster Access Management](../images/hivemq_cluster_access_management.png) +![HiveMQ Cluster Access Management](/images/hivemq_cluster_access_management.png) *HiveMQ Cluster Access Management.* @@ -58,7 +53,7 @@ You'll now have access to your cluster details, which include: **Cluster name**, These details are essential for connecting to the HiveMQ broker using MQTT clients. -![HiveMQ Serverless Cluster Details](../images/hivemq_serverless_cluster_details.png) +![HiveMQ Serverless Cluster Details](/images/hivemq_serverless_cluster_details.png) *HiveMQ Serverless Cluster Details.* @@ -70,7 +65,7 @@ For detailed setup, refer to the [HiveMQ Quick Start Guide](https://docs.hivemq. To ingest data into RisingWave, you'll need to create a RisingWave cluster. Sign up for a free plan at [RisingWave Cloud](https://cloud.risingwave.com/) to explore its features. You can refer to the [RisingWave Documentation](https://docs.risingwave.com/docs/current/intro/) for comprehensive, step-by-step instructions. For further assistance or to join the community, connect with us on [Slack](https://www.risingwave.com/slack). -![RisingWave Cloud Sign-Up](../images/risingwave_cloud_sign_up.png) +![RisingWave Cloud Sign-Up](/images/risingwave_cloud_sign_up.png) *RisingWave Cloud Sign-Up page.* @@ -115,7 +110,7 @@ SELECT * FROM iot_sensor_data LIMIT 5; This query retrieves the top five records, providing a snapshot of the latest IoT data, including device IDs, timestamps, temperature, humidity, and status values. -![IoT Sensor Data Query Results](../images/IoT_sensor_data_query_results.png) +![IoT Sensor Data Query Results](/images/IoT_sensor_data_query_results.png) *Query results for IoT sensor data.* diff --git a/integrations/sources/kafka.mdx b/integrations/sources/kafka.mdx index d9d7ad81..b23983d1 100644 --- a/integrations/sources/kafka.mdx +++ b/integrations/sources/kafka.mdx @@ -13,17 +13,15 @@ RisingWave supports exactly-once semantics by reading transactional messages onl **GUIDED SETUP** -RisingWave Cloud provides an intuitive guided setup for creating a Kafka source. For more information, see [Create a source using guided setup](/cloud/manage-sources/#using-guided-setup) in the RisingWave Cloud documentation. +[RisingWave Cloud](https://cloud.risingwave.com/auth/signup/) provides an intuitive guided setup for creating a Kafka source. For more information, see [Create a source using guided setup](/cloud/manage-sources/#using-guided-setup) in the RisingWave Cloud documentation. -Sign up for RisingWave Cloud - ## Syntax ```sql CREATE {TABLE | SOURCE} [ IF NOT EXISTS ] source_name [ schema_definition ] -[INCLUDE { header | key | offset | partition | timestamp } [AS ]] +[INCLUDE { header | key | offset | partition | timestamp | payload } [AS ]] WITH ( connector='kafka', connector_parameter='value', ... @@ -43,56 +41,47 @@ FORMAT data_format ENCODE data_encode ( ) ``` - -**INFO** - For Avro and Protobuf data, do not specify `schema_definition` in the `CREATE SOURCE` statement. - - -**NOTE** - -RisingWave performs primary key constraint checks on tables but not on sources. If you need the checks to be performed, please create a table. +RisingWave performs primary key constraint checks on tables but not on sources. If you need the checks to be performed, please create a table. For tables with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record. -For tables with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record. +## Parameters - ### Connector parameters -| Field | Notes | -| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Field | Notes | +| :---------------------------- | :--------------------------------------------- | | topic | Required. Address of the Kafka topic. One source can only correspond to one topic. | -| properties.bootstrap.server | Required. Address of the Kafka broker. Format: 'ip:port,ip:port'. | -| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are earliest (earliest offset) and latest (latest offset). If not specified, the default value earliest will be used. | -| scan.startup.timestamp.millis | Optional. RisingWave will start to consume data from the specified UNIX timestamp (milliseconds). If this field is specified, the value for scan.startup.mode will be ignored. | -| group.id.prefix | Optional. Specify a custom group ID prefix for the source. The default prefix is rw-consumer. Each job (materialized view) will have a separate consumer group with a generated suffix in the group ID, so the format of the consumer group is {group_id_prefix}-{fragment_id}. This is used to monitor progress in external Kafka tools and for authorization purposes. RisingWave does not rely on committed offsets or join the consumer group. It only reports offsets to the group. | +| properties.bootstrap.server | Required. Address of the Kafka broker. Format: `ip:port,ip:port`. | +| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are `earliest` (read from low watermark) and `latest` (read from high watermark). If not specified, the default value `earliest` will be used. | +| scan.startup.timestamp.millis | Optional. RisingWave will start to consume data from the specified UNIX timestamp (milliseconds). If this field is specified, the value for `scan.startup.mode` will be ignored. | +| group.id.prefix | Optional. Specify a custom group ID prefix for the source. The default prefix is `rw-consumer`. Each job (materialized view) will have a separate consumer group with a generated suffix in the group ID, so the format of the consumer group is `{group_id_prefix}-{fragment_id}`. This is used to monitor progress in external Kafka tools and for authorization purposes. RisingWave does not rely on committed offsets or join the consumer group. It only reports offsets to the group. | | properties.sync.call.timeout | Optional. Specify the timeout. By default, the timeout is 5 seconds. | | properties.client.id | Optional. Client ID associated with the Kafka client. | ### Other parameters | Field | Notes | -| ------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------ | :--------------------------- | | _data\_format_ | Data format. Supported formats: DEBEZIUM, MAXWELL, CANAL, UPSERT, PLAIN. | | _data\_encode_ | Data encode. Supported encodes: JSON, AVRO, PROTOBUF, CSV. | | _message_ | Message name of the main Message in schema definition. Required for Protobuf. | -| _location_ | Web location of the schema file in http://..., https://..., or S3://... format. This option is not supported for Avro data. For Protobuf data, you must specify either a schema location or a schema registry but not both. | -| _schema.registry_ | Confluent Schema Registry URL. Example: http://127.0.0.1:8081. For Avro data, you must specify a Confluent Schema Registry or an AWS Glue Schema Registry. For Protobuf data, you must specify either a schema location or a Confluent Schema Registry but not both. | +| _location_ | Web location of the schema file in `http://...`, `https://...`, or `S3://...` format.
  • This option is not supported for Avro data.
  • For Protobuf data, you must specify either a schema location or a schema registry but not both.
| +| _schema.registry_ | Confluent Schema Registry URL. Example: `http://127.0.0.1:8081`.
  • For Avro data, you must specify a Confluent Schema Registry or an AWS Glue Schema Registry.
  • For Protobuf data, you must specify either a schema location or a Confluent Schema Registry but not both.
| | _schema.registry.username_ | Conditional. User name for the schema registry. It must be specified with schema.registry.password. | | _schema.registry.password_ | Conditional. Password for the schema registry. It must be specified with schema.registry.username. | -| _schema.registry.name.strategy_ | Optional. Accepts topic\_name\_strategy (default), record\_name\_strategy, topic\_record\_name\_strategy. If it is set to either record\_name\_strategy or topic\_record\_name\_strategy, the message parameter must also be set. It can only be specified with _schema.registry_. | | _access\_key_ | Required if loading descriptors from S3\. The access key ID of AWS. | | _secret\_key_ | Required if loading descriptors from S3\. The secret access key of AWS. | | _region_ | Required if loading descriptors from S3\. The AWS service region. | | _arn_ | Optional. The Amazon Resource Name (ARN) of the role to assume. | | _external\_id_ | Optional. The [external](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) id used to authorize access to third-party resources. | -## Additional Kafka parameters +### Additional Kafka parameters When creating a source in RisingWave, you can specify the following Kafka parameters. To set the parameter, add the RisingWave equivalent of the Kafka parameter under the `WITH options`. For an example of the usage of these parameters, see the JSON example. For additional details on these parameters, see the [Configuration properties](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md). | Kafka parameter name | RisingWave parameter name | Type | -| ------------------------------------- | ------------------------------------------------ | ------- | +| :------------------------------------ | :----------------------------------------------- | :------ | | enable.auto.commit | properties.enable.auto.commit | boolean | | enable.ssl.certificate.verification | properties.enable.ssl.certificate.verification | bool | | fetch.max.bytes | properties.fetch.max.bytes | int | @@ -104,25 +93,21 @@ When creating a source in RisingWave, you can specify the following Kafka parame | receive.message.max.bytes | properties.receive.message.max.bytes | int | | ssl.endpoint.identification.algorithm | properties.ssl.endpoint.identification.algorithm | str | - -**NOTE** - + Set `properties.ssl.endpoint.identification.algorithm` to `none` to bypass the verification of CA certificates and resolve SSL handshake failure. This parameter can be set to either `https` or `none`. By default, it is `https`. - +
### Specific parameters for Amazon MSK -There are some specific parameters for Amazon Managed Streaming for Apache Kafka (MSK), please see[Access MSK in RisingWave](/docs/current/connector-amazon-msk/#access-msk-in-risingwave) for more details. +There are some specific parameters for Amazon Managed Streaming for Apache Kafka (MSK), please see [Access MSK in RisingWave](/docs/current/connector-amazon-msk/#access-msk-in-risingwave) for more details. ## Examples[](#examples "Direct link to Examples") Here are examples of connecting RisingWave to a Kafka broker to read data from individual topics. - -**NOTE** - + RisingWave supports reading messages that have been compressed by [zstd](http://www.zstd.net/). Additional configurations are not required. - + @@ -171,6 +156,19 @@ WITH ( ) FORMAT PLAIN ENCODE JSON; ``` +Use the `payload` keyword to ingest JSON data when you are unsure of the exact schema beforehand. Instead of defining specific column names and types at the very beginning, you can load all JSON data first and then prune and filter the data during runtime. Check the example below: + +```sql +CREATE TABLE table_include_payload (v1 int, v2 varchar) +INCLUDE payload +WITH ( + connector = 'kafka', + topic = 'kafka_1_partition_topic', + properties.bootstrap.server = 'message_queue:29092', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; +``` + The additional Kafka parameters `queued.min.messages` and `queued.max.messages.kbytes` are specified with `properties.queued.min.messages` and `properties.queued.max.messages.kbytes`, respectively, when creating the source. ```sql @@ -361,7 +359,7 @@ If your Kafka source service is located in a different VPC from RisingWave, use To create a Kafka source with a PrivateLink connection, in the WITH section of your `CREATE SOURCE` or `CREATE TABLE` statement, specify the following parameters. | Parameter | Notes | -| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | privatelink.targets | The PrivateLink targets that correspond to the Kafka brokers. The targets should be in JSON format. Note that each target listed corresponds to each broker specified in the properties.bootstrap.server field. If the order is incorrect, there will be connectivity issues. | | privatelink.endpoint | The DNS name of the VPC endpoint. If you're using RisingWave Cloud, you can find the auto-generated endpoint after you created a connection. See details in [Create a PrivateLink connection](/cloud/create-a-connection/#whats-next). | | connection.name | The name of the connection. This parameter should only be included if you are using a connection created with the [CREATE CONNECTION](/docs/current/sql-create-connection/) statement. Omit this parameter if you have provisioned a VPC endpoint using privatelink.endpoint (recommended). | @@ -419,18 +417,16 @@ You need to specify encryption and authentication parameters in the WITH section To read data encrypted with SSL without SASL authentication, specify these parameters in the WITH section of your `CREATE SOURCE` statement. | Parameter | Notes | -| ----------------------------------- | ----------- | +| :---------------------------------- | :---------- | | properties.security.protocol | Set to SSL. | | properties.ssl.ca.location | | | properties.ssl.certificate.location | | | properties.ssl.key.location | | | properties.ssl.key.password | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. - + Here is an example of creating a table encrypted with SSL without using SASL authentication. @@ -456,14 +452,13 @@ WITH ( | Parameter | Notes | -| ---------------------------- | ---------------------------------------------------------------------------------------------- | +| :--------------------------- | :--------------------------------------------------------------------------------------------- | | properties.security.protocol | For SASL/PLAIN without SSL, set to SASL\_PLAINTEXT. For SASL/PLAIN with SSL, set to SASL\_SSL. | | properties.sasl.mechanism | Set to PLAIN. | | properties.sasl.username | | | properties.sasl.password | | -**NOTE** For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. @@ -520,17 +515,15 @@ WITH ( | Parameter | Notes | -| ---------------------------- | ---------------------------------------------------------------------------------------------- | +| :--------------------------- | :--------------------------------------------------------------------------------------------- | | properties.security.protocol | For SASL/SCRAM without SSL, set to SASL\_PLAINTEXT. For SASL/SCRAM with SSL, set to SASL\_SSL. | | properties.sasl.mechanism | Set to SCRAM-SHA-256 or SCRAM-SHA-512 depending on the encryption method used. | | properties.sasl.username | | | properties.sasl.password | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. - + For SASL/SCRAM with SSL, you also need to include these SSL parameters: @@ -562,7 +555,7 @@ WITH ( | Parameter | Notes | -| ------------------------------------------------ | ---------------------------------------------------------------------------------- | +| :----------------------------------------------- | :--------------------------------------------------------------------------------- | | properties.security.protocol | Set to SASL\_PLAINTEXT, as RisingWave does not support using SASL/GSSAPI with SSL. | | properties.sasl.mechanism | Set to GSSAPI. | | properties.sasl.kerberos.service.name | | @@ -571,11 +564,9 @@ WITH ( | properties.sasl.kerberos.kinit.cmd | | | properties.sasl.kerberos.min.time.before.relogin | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. - + Here is an example of creating a source authenticated with SASL/GSSAPI without SSL encryption. @@ -602,23 +593,19 @@ WITH ( -**CAUTION** - The implementation of SASL/OAUTHBEARER in RisingWave validates only [unsecured client side tokens](https://docs.confluent.io/platform/current/kafka/authentication%5Fsasl/authentication%5Fsasl%5Foauth.html#unsecured-client-side-token-creation-options-for-sasl-oauthbearer), and does not support OpenID Connect (OIDC) authentication. Therefore, it should not be used in production environments. | Parameter | Notes | -| ---------------------------------- | ---------------------------------------------------------------------------------------------------------- | +| :--------------------------------- | :--------------------------------------------------------------------------------------------------------- | | properties.security.protocol | For SASL/OAUTHBEARER without SSL, set to SASL\_PLAINTEXT. For SASL/OAUTHBEARER with SSL, set to SASL\_SSL. | | properties.sasl.mechanism | Set to OAUTHBEARER. | | properties.sasl.oauthbearer.config | | - -**NOTE** - + For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. Also, due to the limitation of the SASL/OAUTHBEARER implementation, you only need to specify one OAUTHBEARER parameter: `properties.sasl.oauthbearer.config`. Other OAUTHBEARER parameters are not applicable. - + For SASL/OAUTHBEARER with SSL, you also need to include these SSL parameters: diff --git a/integrations/sources/kinesis.mdx b/integrations/sources/kinesis.mdx index 9672e572..7c9fe98a 100644 --- a/integrations/sources/kinesis.mdx +++ b/integrations/sources/kinesis.mdx @@ -11,7 +11,7 @@ When creating a source, you can choose to persist the data from the source in Ri ```sql CREATE {TABLE | SOURCE} [ IF NOT EXISTS ] source_name [ schema_definition ] -[INCLUDE { header | key | offset | partition | timestamp } [AS ]] +[INCLUDE { header | key | offset | partition | timestamp | payload } [AS ]] WITH ( connector='kinesis', connector_parameter='value', ... @@ -32,23 +32,19 @@ FORMAT data_format ENCODE data_encode ( ``` -**INFO** For Avro and Protobuf data, do not specify `schema_definition` in the `CREATE SOURCE` or `CREATE TABLE` statement. The schema should be provided in a Web location in the option `schema.location` in the `ENCODE` section. - -**NOTE** RisingWave performs primary key constraint checks on tables with connector settings but not on regular sources. If you need the checks to be performed, please create a table with connector settings. For a table with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record. - ### Connector parameters | Field | Notes | -| ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :---------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | stream | Required. Name of the stream. | | aws.region | Required. AWS service region. For example, US East (N. Virginia). | | endpoint | Optional. URL of the entry point for the AWS Kinesis service. | @@ -57,17 +53,17 @@ For a table with primary key constraints, if a new data record with an existing | aws.credentials.session\_token | Optional. The session token associated with the temporary security credentials. Using this field is not recommended as RisingWave contains long-running jobs and the token may expire. Creating a [new role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id%5Froles%5Fcommon-scenarios%5Faws-accounts.html) is preferred. | | aws.credentials.role.arn | Optional. The Amazon Resource Name (ARN) of the role to assume. | | aws.credentials.role.external\_id | Optional. The [external id](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) used to authorize access to third-party resources. | -| scan.startup.mode | Optional. The startup mode for Kinesis consumer. Supported modes: earliest (starts from the earliest offset), latest (starts from the latest offset), and timestamp (starts from a specific timestamp, specified by scan.startup.timestamp.millis). The default mode is earliest. | +| scan.startup.mode | Optional. The startup mode for Kinesis consumer. Supported modes: `earliest` (corresponding to [starting position](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StartingPosition.html) `TRIM_HORIZON`), `latest` (corresponding to [starting position](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StartingPosition.html) `LATEST`), and `timestamp` (starts from a specific timestamp specified by `scan.startup.timestamp.millis`, corresponding to [starting position](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_StartingPosition.html) `AT_TIMESTAMP`). The default mode is `earliest`. | | scan.startup.timestamp.millis | Optional. This field specifies the timestamp, represented in i64, to start consuming from. | ### Other parameters | Field | Notes | -| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _data\_format_ | Supported formats: DEBEZIUM, MAXWELL, CANAL, UPSERT, PLAIN. | | _data\_encode_ | Supported encodes: JSON, AVRO, PROTOBUF, CSV, BYTES. | | _message_ | Message name of the main Message in schema definition. Required when data\_encode is PROTOBUF. | -| _location_ | Web location of the schema file in http://..., https://..., or S3://... format. Required when data\_encode is AVRO or PROTOBUF. Examples:https://\/risingwave/proto-simple-schema.protos3://risingwave-demo/schema-location | +| _location_ | Web location of the schema file in `http://...`, `https://...`, or `S3://...` format. Required when `data_encode` is `AVRO` or `PROTOBUF`.
Examples:`https:///risingwave/proto-simple-schema.proto` `s3://risingwave-demo/schema-location` | ## Example @@ -109,6 +105,20 @@ WITH ( aws.credentials.secret_access_key = 'your_secret_key' ) FORMAT PLAIN ENCODE JSON; ``` + +Use the `payload` keyword to ingest JSON data when you are unsure of the exact schema beforehand. Instead of defining specific column names and types at the very beginning, you can load all JSON data first and then prune and filter the data during runtime. Check the example below: + +```sql +CREATE TABLE table_include_payload (v1 int, v2 varchar) +INCLUDE payload +WITH ( + connector = 'kinesis', + topic = 'kinesis_1_partition_topic', + properties.bootstrap.server = 'message_queue:29092', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; +``` +
```sql diff --git a/integrations/sources/mongodb-cdc.mdx b/integrations/sources/mongodb-cdc.mdx index 8d858516..746277d6 100644 --- a/integrations/sources/mongodb-cdc.mdx +++ b/integrations/sources/mongodb-cdc.mdx @@ -33,7 +33,7 @@ WITH ( Unless specified otherwise, the fields listed are required. Note that the value of these parameters should be enclosed in single quotation marks. | Field | Notes | -| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | mongodb.url | The [connection string](https://www.mongodb.com/docs/manual/reference/connection-string/) of MongoDB. | | collection.name | The collection or collections you want to ingest data from. Use the format db\_name.collection\_name to specify which database the collection is in. To ingest data from collections in different database, use a comma-separated list of regular expressions. | @@ -63,7 +63,7 @@ You can see the [INCLUDE clause](/docs/current/include-clause/) for more details ### Metadata options | Field | Notes | -| ---------------- | ------------------------------- | +| :--------------- | :------------------------------ | | database\_name | Name of the database. | | collection\_name | Name of the MongoDB collection. | diff --git a/integrations/sources/mqtt.mdx b/integrations/sources/mqtt.mdx index ea2a6c92..3ffc88af 100644 --- a/integrations/sources/mqtt.mdx +++ b/integrations/sources/mqtt.mdx @@ -54,16 +54,17 @@ FORMAT PLAIN ENCODE data_encode; -- Format options: plain (encode BYTES and JSON ### Parameters | Field | Notes | -| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with tcp://, mqtt://, ssl://, or mqtts:// to denote the protocol. mqtts:// and ssl:// use native certificates if no CA is specified. | -| qos | Optional. The quality of service for publishing messages. Defaults to at\_most\_once. Options include at\_most\_once, at\_least\_once, or exactly\_once. | +| :----------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with `tcp://`, `mqtt://`, `ssl://`, or `mqtts://` to denote the protocol. `mqtts://` and `ssl://` use native certificates if no CA is specified. | +| qos | Optional. The quality of service for publishing messages. Defaults to at\_most\_once. Options include `at_most_once`, `at_least_once`, or `exactly_once`. | | username | Optional. Username for the MQTT broker. | | password | Optional. Password for the MQTT broker. | | client\_prefix | Optional. Prefix for the MQTT client ID. Defaults to "risingwave". | -| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects. If true, the broker clears all client states upon disconnect; if false, the broker retains the client state and resumes pending operations upon reconnection. | +| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects.
  • If true, the broker clears all client states upon disconnect;
  • If false, the broker retains the client state and resumes pending operations upon reconnection.
| | inflight\_messages | Optional. Maximum number of inflight messages. Defaults to 100. | -| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use fs:// prefix for file paths. | -| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use fs:// prefix for file paths. | +| max\_packet\_size | Optional. The maximum message size for the MQTT client. | +| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use `fs://` prefix for file paths. | +| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use `fs://` prefix for file paths. | | topic | Required. The topic name to subscribe or publish to. Can include wildcard topics, e.g., /topic/#. | This SQL statement creates a table named `iot_sensor_data` with columns for device ID, timestamp, temperature, humidity, and device status. The table is configured to connect to an MQTT broker using the MQTT connector, with specific URL, topic, and quality of service (QoS) settings, the data is encoded as JSON. diff --git a/integrations/sources/mysql-cdc.mdx b/integrations/sources/mysql-cdc.mdx index ec741a2e..f1ea39f2 100644 --- a/integrations/sources/mysql-cdc.mdx +++ b/integrations/sources/mysql-cdc.mdx @@ -30,13 +30,13 @@ To use the MySQL CDC features, we need to create a MySQL user account with appro CREATE USER 'user'@'%' IDENTIFIED BY 'password'; ``` -1. Grant the appropriate privileges to the user. +2. Grant the appropriate privileges to the user. ```sql GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'user'@'%'; ``` -1. Finalize the privileges. +3. Finalize the privileges. ```sql FLUSH PRIVILEGES; @@ -57,7 +57,7 @@ SHOW VARIABLES LIKE 'log_bin'; +---------------+-------+ ``` -1. If it is `OFF`, configure your MySQL server configuration file, my.cnf, with the following properties described below. Restart your MySQL server to let the configurations take effect. +2. If it is `OFF`, configure your MySQL server configuration file, my.cnf, with the following properties described below. Restart your MySQL server to let the configurations take effect. ```sql server-id = 223344 @@ -67,7 +67,7 @@ binlog_row_image = FULL expire_logs_days = 10 ``` -1. Confirm your changes by checking the `log-bin` again. +3. Confirm your changes by checking the `log-bin` again. ```sql SHOW VARIABLES LIKE 'log_bin'; @@ -86,7 +86,7 @@ The configuration process is different for AWS RDS MySQL or Aurora (MySQL-Compat 1. Turn on binary logging and choose a non-zero value for the **Retention period**.![Set retention period to a nonzero value](/images/ret-period.png) 2. Create a parameter group for MySQL instances. We created a parameter group named MySQL-CDC for the instance that runs MySQL 5.7.x.![Create a parameter group](/images/parameter-group.png) 3. Click the MySQL-CDC parameter group to edit the values of **binlog\_format** to **ROW** and **binlog\_row\_image** to **full**.![Set binlog_format to row](/images/binlog-format.png) ![Set binlog_row_image to full](/images/binlog-row.png) -4. Modify your RDS instance and apply the modified parameter group to your database.![Select modify](/images/modify-RDS.png) ![Apply changes to database](/images/apply-to-databas.png) +4. Modify your RDS instance and apply the modified parameter group to your database.![Select modify](/images/modify-RDS.png) ![Apply changes to database](/images/apply-to-database.png) 5. Click **Continue** and choose **Apply immediately**. Finally, click **Modify DB instance** to save the changes. Remember to reboot your MySQL instance.![Save changes made to MySQL RDS instance](/images/save-changes.png) 6. Ensure your MySQL users can access the tables and replications. @@ -131,8 +131,8 @@ FROM source TABLE table_name; All the fields listed below are required. Note that the value of these parameters should be enclosed in single quotation marks. -| Field | Notes | -| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Field | Notes | +| :------------ | :------------ | | hostname | Hostname of the database. | | port | Port number of the database. | | username | Username of the database. | @@ -140,13 +140,14 @@ All the fields listed below are required. Note that the value of these parameter | database.name | Name of the database. Note that RisingWave cannot read data from a built-in MySQL database, such as mysql, sys, etc. | | table.name | Name of the table that you want to ingest data from. | | server.id | Required if creating a shared source. A numeric ID of the database client. It must be unique across all database processes that are running in the MySQL cluster. If not specified, RisingWave will generate a random ID. | -| ssl.mode | Optional. The ssl.mode parameter determines the level of SSL/TLS encryption for secure communication with MySQL. It accepts three values: disabled, preferred, and required. The default value is disabled. When set to required, it enforces TLS for establishing a connection. | +|auto.schema.change| Optional. Specify whether you want to enable replicating MySQL table schema change. Set `auto.schema.change = 'true'` to enable it.| +| ssl.mode | Optional. The ssl.mode parameter determines the level of SSL/TLS encryption for secure communication with MySQL. Accepted values are disabled, preferred, and required. The default value is disabled. When set to required, it enforces TLS for establishing a connection. | | transactional | Optional. Specify whether you want to enable transactions for the CDC table that you are about to create. By default, the value is 'true' for shared sources, and 'false' otherwise. This feature is also supported for shared CDC sources for multi-table transactions. For performance considerations, transactions involving changes to more than 4096 rows cannot be guaranteed. | The following fields are used when creating a CDC table. -| Field | Notes | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Field | Notes | +| :------------ | :------------ | | snapshot | Optional. If false, CDC backfill will be disabled and only upstream events that have occurred after the creation of the table will be consumed. This option can only be applied for tables created from a shared source. | | snapshot.interval | Optional. Specifies the barrier interval for buffering upstream events. The default value is 1. | | snapshot.batch\_size | Optional. Specifies the batch size of a snapshot read query from the upstream table. The default value is 1000. | @@ -171,7 +172,7 @@ SELECT * FROM t2 ORDER BY v1; You can see the [INCLUDE clause](/docs/current/include-clause/) for more details. -#### Debezium parameters +### Debezium parameters [Debezium v2.6 connector configuration properties](https://debezium.io/documentation/reference/2.6/connectors/mysql.html#mysql-advanced-connector-configuration-properties) can also be specified under the `WITH` clause when creating a table or shared source. Add the prefix `debezium.` to the connector property you want to include. @@ -199,7 +200,7 @@ Data is in Debezium JSON format. [Debezium](https://debezium.io) is a log-based Below are the metadata columns available for MySQL CDC. | Field | Notes | -| -------------- | --------------------- | +| :------------- | :-------------------- | | database\_name | Name of the database. | | schema\_name | Name of the schema. | | table\_name | Name of the table. | @@ -278,8 +279,8 @@ The following table shows the corresponding data type in RisingWave that should RisingWave data types marked with an asterisk indicate that while there is no corresponding RisingWave data type, the ingested data can still be consumed as the listed type. -| MySQL type | RisingWave type | -| ------------------------------------------------------------------------------------------------------------ | -------------------------- | +| MySQL type | RisingWave type | +| :---- | :------------------------- | | BOOLEAN, BOOL | BOOLEAN | | BIT(1) | BOOLEAN\* | | BIT(>1) | No support | @@ -314,12 +315,12 @@ RisingWave data types marked with an asterisk indicate that while there is no co | DATETIME\[(fsp)\] Optional fractional seconds precision (fsp: 0-6). If omitted, the default precision is 0. | TIMESTAMP | | NUMERIC\[(M\[,D\])\] | NUMERIC | | DECIMAL\[(M\[,D\])\] | NUMERIC | -| GEOMETRY, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, GEOMETRYCOLLECTION | STRUCT | +| GEOMETRY, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, GEOMETRYCOLLECTION | Not supported | Please be aware that the range of specific values varies among MySQL types and RisingWave types. Refer to the table below for detailed information. -| MySQL type | RisingWave type | MySQL range | RisingWave range | -| ---------- | --------------- | -------------------------------------------------------- | ------------------------------------------ | +| MySQL type | RisingWave type | MySQL range | RisingWave range | +| :--------- | :-------------- | :---------------- | :---------------------- | | TIME | TIME | \-838:59:59.000000 to 838:59:59.000000 | 00:00:00 to 23:59:59 | | DATE | DATE | 1000-01-01 to 9999-12-31 | 0001-01-01 to 9999-12-31 | | DATETIME | TIMESTAMP | 1000-01-01 00:00:00.000000 to 9999-12-31 23:59:59.49999 | 1973-03-03 09:46:40 to 5138-11-16 09:46:40 | @@ -362,14 +363,13 @@ CREATE TABLE {{ this }} ( **PREMIUM EDITION FEATURE** This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com). -
- + PUBLIC PREVIEW -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage). - + RisingWave supports automatically mapping the upstream table schema when creating a CDC table from a MySQL CDC source. Instead of defining columns individually, you can use `*` when creating a table to ingest all columns from the source table. Note that `*` cannot be used if other columns are specified in the table creation process. Below is an example to create a table that ingests all columns from the upstream table from the MySQL database: @@ -403,12 +403,11 @@ And this it the output of `DESCRIBE supplier;` **PREMIUM EDITION FEATURE** This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com). - **PUBLIC PREVIEW** -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage). RisingWave supports auto schema changes in MySQL CDC. It ensures that your RisingWave pipeline stays synchronized with any schema changes in the source database, reducing the need for manual updates and preventing inconsistencies. diff --git a/integrations/sources/nats-jetstream.mdx b/integrations/sources/nats-jetstream.mdx index ee54e9af..36edb509 100644 --- a/integrations/sources/nats-jetstream.mdx +++ b/integrations/sources/nats-jetstream.mdx @@ -11,7 +11,7 @@ sidebarTitle: NATS JetStream **PUBLIC PREVIEW** -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage). ## Prerequisites @@ -36,18 +36,18 @@ WITH ( connector='nats', server_url=':', [ , ...] subject='[, + connect_mode='', username='', - password='' + password='', jwt=``, nkey=``, ... -- delivery parameters - scan.startup.mode=`startup_mode` - scan.startup.timestamp.millis='xxxxx', + scan.startup.mode=``, + scan.startup.timestamp.millis='xxxxx' ) FORMAT PLAIN ENCODE data_encode; ``` @@ -61,16 +61,12 @@ FORMAT PLAIN ENCODE data_encode; ) ``` - -**NOTE** - + RisingWave performs primary key constraint checks on tables with connector settings but not on regular sources. If you need the checks to be performed, please create a table with connector settings. For a table with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record. - - -**NOTE** + According to the [NATS documentation](https://docs.nats.io/running-a-nats-service/nats%5Fadmin/jetstream%5Fadmin/naming), stream names must adhere to subject naming rules as well as being friendly to the file system. Here are the recommended guidelines for stream names: @@ -80,22 +76,22 @@ According to the [NATS documentation](https://docs.nats.io/running-a-nats-servic * Keep the name length limited to 32 characters as the JetStream storage directories include the account, stream name, and consumer name. * Avoid using reserved file names like `NUL` or `LPT1`. * Be cautious of case sensitivity in file systems. To prevent collisions, ensure that stream or account names do not clash due to case differences. For example, `Foo` and `foo` would collide on Windows or macOS systems. - + ### Parameters | Field | Notes | -| -------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | server\_url | Required. URLs of the NATS JetStream server, in the format of _address_:_port_. If multiple addresses are specified, use commas to separate them. | | subject | Required. NATS subject that you want to ingest data from. To specify more than one subjects, use a comma. | | stream | Required. NATS stream that you want to ingest data from. | | connect\_mode | Required. Authentication mode for the connection. Allowed values: plain: No authentication. user\_and\_password: Use user name and password for authentication. For this option, username and password must be specified. credential: Use JSON Web Token (JWT) and NKeys for authentication. For this option, jwt and nkey must be specified. | | jwt and nkey | JWT and NKEY for authentication. For details, see [JWT](https://docs.nats.io/running-a-nats-service/configuration/securing%5Fnats/auth%5Fintro/jwt) and [NKeys](https://docs.nats.io/running-a-nats-service/configuration/securing%5Fnats/auth%5Fintro/nkey%5Fauth). | | username and password | Conditional. The client user name and password. Required when connect\_mode is user\_and\_password. | -| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The supported modes are: earliest: Consume data from the earliest offset.latest: Consume data from the latest offset.timestamp\_millis: Consume data from a particular UNIX timestamp, which is specified via scan.startup.timestamp.millis.If not specified, the default value earliest will be used. | -| scan.startup.timestamp.millis | Conditional. Required when scan.startup.mode is timestamp\_millis. RisingWave will start to consume data from | +| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The supported modes are:
  • `earliest`: Consume from the earliest available message, corresponding to [deliver policy](https://docs.nats.io/nats-concepts/jetstream/consumers#deliverpolicy) `DeliverAll`.
  • `latest`: Consume from the next message, corresponding to `DeliverNew` policy.
  • `timestamp`: Consume from a particular UNIX timestamp specified via `scan.startup.timestamp.millis`, corresponding to `DeliverByStartTime` policy.
If not specified, the default value `earliest` will be used. | +| scan.startup.timestamp.millis | Conditional. Required when scan.startup.mode is timestamp. RisingWave will start to consume data from the specified UNIX timestamp. | | data\_encode | Supported encodes: JSON, PROTOBUF, BYTES. | | consumer.deliver\_subject | Optional. Subject to deliver messages to. | -| consumer.durable\_name | Optional. Durable name for the consumer. | +| consumer.durable\_name | Required. Durable name for the consumer. | | consumer.name | Optional. Name of the consumer. | | consumer.description | Optional. Description of the consumer. | | consumer.deliver\_policy | Optional. Policy on how messages are delivered. | diff --git a/integrations/sources/postgresql-cdc.mdx b/integrations/sources/postgresql-cdc.mdx index 893a9aa3..86365be7 100644 --- a/integrations/sources/postgresql-cdc.mdx +++ b/integrations/sources/postgresql-cdc.mdx @@ -26,8 +26,13 @@ By default, it is `replica`. For CDC, you will need to set it to logical in the ALTER SYSTEM SET wal_level = logical; ``` Keep in mind that changing the `wal_level` requires a restart of the PostgreSQL instance and can affect database performance. -note + + + If you choose to create multiple CDC tables without using a shared source, be sure to set `max_wal_senders` to be greater than or equal to the number of synced tables. By default, `max_wal_senders` is 10. + + + 2. Assign `REPLICATION`, `LOGIN`,and `CREATEDB` role attributes to the user. For an existing user, run the following statement to assign the attributes: `ALTER USER REPLICATION LOGIN CREATEDB;` @@ -145,7 +150,7 @@ To check the progress of backfilling historical data, find the corresponding int Unless specified otherwise, the fields listed are required. Note that the value of these parameters should be enclosed in single quotation marks. | Field | Notes | -| ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | hostname | Hostname of the database. | | port | Port number of the database. | | username | Username of the database. | @@ -154,19 +159,22 @@ Unless specified otherwise, the fields listed are required. Note that the value | schema.name | Optional. Name of the schema. By default, the value is public. | | table.name | Name of the table that you want to ingest data from. | | slot.name | Optional. The [replication slot](https://www.postgresql.org/docs/14/logicaldecoding-explanation.html#LOGICALDECODING-REPLICATION-SLOTS) for this PostgreSQL source. By default, a unique slot name will be randomly generated. Each source should have a unique slot name. Valid replication slot names must contain only lowercase letters, numbers, and underscores, and be no longer than 63 characters. | -| ssl.mode | Optional. The ssl.mode parameter determines the level of SSL/TLS encryption for secure communication with Postgres. It accepts three values: disabled, preferred, and required. The default value is disabled. When set to required, it enforces TLS for establishing a connection. | -| publication.name | Optional. Name of the publication. By default, the value is rw\_publication. For more information, see [Multiple CDC source tables](#multiple-cdc-source-tables). | -| publication.create.enable | Optional. By default, the value is 'true'. If publication.name does not exist and this value is 'true', a publication.name will be created. If publication.name does not exist and this value is 'false', an error will be returned. | -| transactional | Optional. Specify whether you want to enable transactions for the CDC table that you are about to create. By default, the value is 'true' for shared sources, and 'false' otherwise. This feature is also supported for shared CDC sources for multi-table transactions. For performance considerations, transactions involving changes to more than 4096 rows cannot be guaranteed. | - -note +|auto.schema.change| Optional. Specify whether you want to enable replicating Postgres table schema change.| +|ssl.mode| Optional. The `ssl.mode` parameter determines the level of SSL/TLS encryption for secure communication with Postgres. Accepted values are `disabled`, `preferred`, `required`, `verify-ca`, and `verify-full`. The default value is `disabled`.
  • When set to `required`, it enforces TLS for establishing a connection;
  • When set to `verify-ca`, it verifies that the server is trustworthy by checking the certificate chain up to the root certificate stored on the client;
  • When set to `verify-full`, it verifies the certificate and also ensures the server hostname matches the name in the certificate.
| +| ssl.root.cert | Optional. Specify the root certificate secret. You must [create secret](/operate/manage-secrets) first and then use it here.| +| publication.name | Optional. Name of the publication. By default, the value is `rw_publication`. | +| publication.create.enable | Optional. By default, the value is `true`. If publication.name does not exist and this value is `true`, a publication.name will be created. If publication.name does not exist and this value is `false`, an error will be returned. | +| transactional | Optional. Specify whether you want to enable transactions for the CDC table that you are about to create. By default, the value is `true` for shared sources, and `false` otherwise. This feature is also supported for shared CDC sources for multi-table transactions. For performance considerations, transactions involving changes to more than 4096 rows cannot be guaranteed. | + RisingWave implements CDC via PostgreSQL replication. Inspect the current progress via the [pg\_replication\_slots](https://www.postgresql.org/docs/14/view-pg-replication-slots.html) view. Remove inactive replication slots via [pg\_drop\_replication\_slot()](https://www.postgresql.org/docs/current/functions-admin.html#:~:text=pg%5Fdrop%5Freplication%5Fslot). RisingWave does not automatically drop inactive replication slots. You must do this manually to prevent WAL files from accumulating in the upstream PostgreSQL database. + + The following fields are used when creating a CDC table. | Field | Notes | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | snapshot | Optional. If false, CDC backfill will be disabled and only upstream events that have occurred after the creation of the table will be consumed. This option can only be applied for tables created from a shared source. | | snapshot.interval | Optional. Specifies the barrier interval for buffering upstream events. The default value is 1. | | snapshot.batch\_size | Optional. Specifies the batch size of a snapshot read query from the upstream table. The default value is 1000. | @@ -191,7 +199,7 @@ SELECT * FROM t2 ORDER BY v1; You can see the [INCLUDE clause](/docs/current/include-clause/) for more details. -#### Debezium parameters +### Debezium parameters [Debezium v2.6 connector configuration properties](https://debezium.io/documentation/reference/2.6/connectors/postgresql.html#postgresql-advanced-configuration-properties) can also be specified under the `WITH` clause when creating a table or shared source. Add the prefix `debezium.` to the connector property you want to include. @@ -218,7 +226,7 @@ Data is in Debezium JSON format. [Debezium](https://debezium.io) is a log-based Below are the metadata columns available for PostgreSQL CDC. | Field | Notes | -| -------------- | --------------------- | +| :------------- | :-------------------- | | database\_name | Name of the database. | | schema\_name | Name of the schema. | | table\_name | Name of the table. | @@ -286,14 +294,12 @@ The following table shows the corresponding data type in RisingWave that should RisingWave data types marked with an asterisk indicate that while there is no corresponding RisingWave data type, the ingested data can still be consumed as the listed type. - -**NOTE** - + RisingWave cannot correctly parse composite types from PostgreSQL as Debezium does not support composite types in PostgreSQL. - + | PostgreSQL type | RisingWave type | -| ---------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | BOOLEAN | BOOLEAN | | BIT(1) | BOOLEAN | | BIT( > 1) | No support | @@ -372,12 +378,11 @@ CREATE TABLE {{ this }} ( **PREMIUM EDITION FEATURE** This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com). - **PUBLIC PREVIEW** -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage). RisingWave supports automatically mapping the upstream table schema when creating a CDC table from a PostgreSQL CDC source. Instead of defining columns individually, you can use `*` when creating a table to ingest all columns from the source table. Note that `*` cannot be used if other columns are specified in the table creation process. @@ -407,6 +412,25 @@ And this it the output of `DESCRIBE supplier;` ``` +## Ingest data from a partitioned table + +**PUBLIC PREVIEW** + +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). + + +RisingWave supports ingesting data from a partitioned table. To configure a publication for your CDC stream, note that PostgreSQL, by default, creates publications with `publish_via_partition_root = false`. This setting causes replication slot events to contain separate events for each partition, rather than for the root partitioned table. + +If you need to read from the partitioned table, you should explicitly set this property to `TRUE` when creating a publication. Execute the following command in your upstream PostgreSQL database: + +```sql +CREATE PUBLICATION publication_name FOR table_name WITH (publish_via_partition_root = true); +``` + +If you let RisingWave create the publication, it will automatically set `publish_via_partition_root = true`. + +Please be aware that PostgreSQL does not support adding both a partitioned table and its individual partitions to the same publication; however, it does not generate an error if attempted. If you need to ingest data from both the root table and its partitions, you should create separate publications for each. Otherwise, you will not be able to read from the table partitions. Meanwhile, in RisingWave, you should create separate sources with dedicated publication names for the partitioned table and its partitions. + ## Monitor the progress of direct CDC To observe the progress of direct CDC for PostgreSQL, use the following methods: diff --git a/integrations/sources/pulsar.mdx b/integrations/sources/pulsar.mdx index c64828a3..0559ab16 100644 --- a/integrations/sources/pulsar.mdx +++ b/integrations/sources/pulsar.mdx @@ -7,7 +7,7 @@ sidebarTitle: Apache Pulsar **PUBLIC PREVIEW** -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage). When creating a source, you can choose to persist the data from the source in RisingWave by using `CREATE TABLE` instead of `CREATE SOURCE` and specifying the connection settings and data format. @@ -17,7 +17,7 @@ When creating a source, you can choose to persist the data from the source in Ri ```sql CREATE {TABLE | SOURCE} [ IF NOT EXISTS ] source_name [ schema_definition ] -[INCLUDE { header | key | offset | partition | timestamp } [AS ]] +[INCLUDE { header | key | offset | partition | timestamp | payload } [AS ]] WITH ( connector='pulsar', connector_parameter='value', ... @@ -38,43 +38,38 @@ FORMAT data_format ENCODE data_encode ( ``` -**INFO** For Avro and Protobuf data, do not specify `schema_definition` in the `CREATE SOURCE` or `CREATE TABLE` statement. The schema should be provided in a Web location in the option `schema.location` in `ENCODE properties` section. - -**NOTE** - RisingWave performs primary key constraint checks on tables with connector settings but not on regular sources. If you need the checks to be performed, please create a table with connector settings. For a table with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record. - ### Connector parameters | Field | Notes | -| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | topic | Required. Address of the Pulsar topic. One source can only correspond to one topic. | -| service.url | Required. Address of the Pulsar service. Typically in the format pulsar:// or pulsar+ssl://\:\ | -| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are earliest (earliest offset) and latest (latest offset). If not specified, the default value earliest will be used. | +| service.url | Required. Address of the Pulsar service. Typically in the format `pulsar://` or `pulsar+ssl://:` | +| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are `earliest` (earliest offset) and `latest` (latest offset). If not specified, the default value `earliest` will be used. | | scan.startup.timestamp.millis. | Optional. RisingWave will start to consume data from the specified UNIX timestamp (milliseconds). | -| auth.token | Optional. A token for auth. If both auth.token and oauth are set, only oauth authorization is effective. | -| oauth.issuer.url | Optional. The issuer url for OAuth2\. This field must be filled if other oauth fields are specified. | -| oauth.credentials.url | Optional. The path for credential files, starts with file://. This field must be filled if other oauth fields are specified. | -| oauth.audience | Optional. The audience for OAuth2\. This field must be filled if other oauth fields are specified. | +| auth.token | Optional. A token for auth. If both `auth.token` and `oauth` are set, only `oauth` authorization is effective. | +| oauth.issuer.url | Optional. The issuer url for OAuth2\. This field must be filled if other `oauth` fields are specified. | +| oauth.credentials.url | Optional. The path for credential files, starts with `file://`. This field must be filled if other `oauth` fields are specified. | +| oauth.audience | Optional. The audience for OAuth2\. This field must be filled if other `oauth` fields are specified. | | oauth.scope | Optional. The scope for OAuth2. | ### Other parameters | Field | Notes | -| ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _data\_format_ | Supported formats: DEBEZIUM, MAXWELL, CANAL, UPSERT, PLAIN. | | _data\_encode_ | Supported encodes: JSON, AVRO, PROTOBUF, CSV, BYTES. | | _message_ | Message name of the main Message in schema definition. Required when data\_encode is PROTOBUF. | -| _location_ | Web location of the schema file in http://..., https://..., or S3://... format. Required when data\_encode is AVRO or PROTOBUF. Examples:https://\/risingwave/proto-simple-schema.protos3://risingwave-demo/schema-location | -| _aws.credentials.access\_key\_id_ | Optional. The AWS access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. | -| _aws.credentials.secret\_access\_key_ | Optional. The AWS secret access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. | +| _location_ | Web location of the schema file in `http://...`, `https://...`, or `S3://...` format. Required when `data_encode` is `AVRO` or `PROTOBUF`. Examples:`https://\/risingwave/proto-simple-schema.proto`,`s3://risingwave-demo/schema-location` | +| _aws.credentials.access\_key\_id_ | Optional. The AWS access key for loading from S3\. This field does not need to be filled if `oauth.credentials.url` is specified to a local path. | +| _aws.credentials.secret\_access\_key_ | Optional. The AWS secret access key for loading from S3\. This field does not need to be filled if `oauth.credentials.url` is specified to a local path. | | _region_ | Required if loading descriptors from S3\. The AWS service region. | | _aws.credentials.role.arn_ | Optional. The Amazon Resource Name (ARN) of the role to assume. | | _aws.credentials.role.external\_id_ | Optional. The [external](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) id used to authorize access to third-party resources. | @@ -144,6 +139,19 @@ WITH ( scan.startup.mode='latest', scan.startup.timestamp.millis='140000000' ) FORMAT PLAIN ENCODE JSON; +``` + +Use the `payload` keyword to ingest JSON data when you are unsure of the exact schema beforehand. Instead of defining specific column names and types at the very beginning, you can load all JSON data first and then prune and filter the data during runtime. Check the example below: + +```sql +CREATE TABLE table_include_payload (v1 int, v2 varchar) +INCLUDE payload +WITH ( + connector = 'pulsar', + topic = 'pulsar_1_partition_topic', + properties.bootstrap.server = 'message_queue:29092', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; ``` diff --git a/integrations/sources/s3.mdx b/integrations/sources/s3.mdx index 2952e285..4a1ff358 100644 --- a/integrations/sources/s3.mdx +++ b/integrations/sources/s3.mdx @@ -11,7 +11,7 @@ The S3 connector does not guarantee the sequential reading of files or complete ```sql CREATE SOURCE [ IF NOT EXISTS ] source_name schema_definition -[INCLUDE { file | offset } [AS ]] +[INCLUDE { file | offset | payload } [AS ]] WITH ( connector='s3', connector_parameter='value', ... @@ -23,7 +23,7 @@ FORMAT data_format ENCODE data_encode ( ``` -**INFO** + For CSV data, specify the delimiter in the `delimiter` option in `ENCODE properties`. @@ -40,32 +40,33 @@ For CSV data, specify the delimiter in the `delimiter` option in `ENCODE propert ## Parameters | Field | Notes | -| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | connector | Required. Support the s3 connector only. | | s3.region\_name | Required. The service region. | | s3.bucket\_name | Required. The name of the bucket the data source is stored in. | | s3.credentials.access | Required. This field indicates the access key ID of AWS. | | s3.credentials.secret | Required. This field indicates the secret access key of AWS. | | s3.endpoint\_url | Conditional. The host URL for an S3-compatible object storage server. This allows users to use a different server instead of the standard S3 server. | -| compression\_format | Optional. This field specifies the compression format of the file being read. You can define compression\_format in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the .gz suffix. When set to None or not defined, the file reader will automatically read and decompress .gz and .gzip files. | -| match\_pattern | Conditional. This field is used to find object keys in s3.bucket\_name that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. | +| compression\_format | Optional. This field specifies the compression format of the file being read. You can define `compression_format` in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the `.gz` suffix. When set to `None` or not defined, the file reader will automatically read and decompress `.gz` and `.gzip` files. | +| match\_pattern | Conditional. This field is used to find object keys in `s3.bucket_name` that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. | | s3.assume\_role | Optional. Specifies the ARN of an IAM role to assume when accessing S3\. It allows temporary, secure access to S3 resources without sharing long-term credentials. | +| refresh.interval.sec | Optional. Configure the time interval between operations of listing files. It determines the delay in discovering new files, with a default value of 60 seconds. | note Empty cells in CSV files will be parsed to `NULL`. | Field | Notes | -| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| :---------------- | :--------------------------------------------------------------------------------------------------------------------------------------- | | _data\_format_ | Supported data format: PLAIN. | | _data\_encode_ | Supported data encodes: CSV, JSON, PARQUET. | -| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: 'true', 'false'. Default: 'true'. | -| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is \\n; for CSV encode, the delimiter can be one of ,, ;, E'\\t'. | +| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: `true`, `false`. Default is `true`. | +| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is `\n`; for CSV encode, the delimiter can be one of `,`, `;`, `E'\t'`. | ### Additional columns | Field | Notes | -| -------- | --------------------------------------------------------------------------------------------------------------------------- | +| :------- | :-------------------------------------------------------------------------------------------------------------------------- | | _file_ | Optional. The column contains the file name where current record comes from. | | _offset_ | Optional. The column contains the corresponding bytes offset (record offset for parquet files) where current message begins | @@ -114,6 +115,20 @@ WITH ( ) FORMAT PLAIN ENCODE JSON; ``` + +Use the `payload` keyword to ingest JSON data when you are unsure of the exact schema beforehand. Instead of defining specific column names and types at the very beginning, you can load all JSON data first and then prune and filter the data during runtime. Check the example below: + +```sql +CREATE TABLE table_include_payload (v1 int, v2 varchar) +INCLUDE payload +WITH ( + connector = 's3', + topic = 's3_1_partition_topic', + properties.bootstrap.server = 'message_queue:29092', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; +``` + ```sql @@ -179,12 +194,10 @@ Function signature file_scan(file_format, storage_type, s3_region, s3_access_key, s3_secret_key, file_location_or_directory) ``` - -**NOTE** - + When reading a directory of Parquet files, the schema will be based on the first Parquet file listed. Please ensure that all Parquet files in the directory have the same schema. - + For example, assume you have a Parquet file named `sales_data.parquet` that stores a company's sales data, containing the following fields: * `product_id`: Product ID diff --git a/integrations/sources/solace.mdx b/integrations/sources/solace.mdx new file mode 100644 index 00000000..9cd7c34f --- /dev/null +++ b/integrations/sources/solace.mdx @@ -0,0 +1,123 @@ +--- +title: Ingest data from Solace +sidebarTitle: Solace +--- + +You can ingest data from [Solace](https://solace.com/)’s PubSub+ Platform, a powerful event-driven streaming solution designed for real-time enterprises. + +It facilitates the design, deployment, integration, and management of event-driven architectures (EDAs) across hybrid, multi-cloud, and IoT environments. It enables seamless data exchange across legacy systems, SaaS applications, messaging services, databases, and AI agents, connecting them to a real-time event-driven layer. + +## Set up Solace + +To set up [Solace PubSub+](https://solace.com/try-it-now/) event broker, you can either choose the free Software version using Docker or Solace PubSub+ Cloud. + + + + + +## Scenario + +Consider this scenario: automating the process of notifying passengers that "online check-in is open" exactly 48 hours before their flight departure. Airlines need to handle continuous streams of flight and passenger data to send timely "Check-in Open" alerts to passengers who have opted in. The process begins 72 hours before departure, as flight and passenger data enter the system. Then, at 48 hours before departure, a notification is triggered for eligible passengers. + +The solution involves two key steps: + +1. **Event Stream Processing:** Continuous streams of flight and passenger data are received from the Departure Control System (DCS) via Solace. Each flight is tracked by a unique identifier, and each passenger by a unique Passenger Reference Number (PRN), enabling real-time processing in RisingWave. +2. **Notification Logic:** Notifications are sent only to passengers who have opted in. + +Below is the sample data of flight and passenger details. Solace topic: `passenger_full_details` + +```json +{ + "passenger_ref_number": "PRN026", + "flight_id": "LH6456", + "flight_number": "6456", + "carrier_code": "LH", + "flight_date": "2024-10-17", + "origin": "LHR", + "departure_time": "2024-10-17T04:40:00Z", + "contact_info": "john.garcia@gmail.com", + "opted_in": true + } +``` + +## Ingest data from Solace into RisingWave + +Create a RisingWave cluster in [RisingWave Cloud](https://cloud.risingwave.com/) using the free plan. See the [documentation of RisingWave Cloud](/cloud/manage-projects) for instructions. + + +Solace PubSub+ supports popular open protocols like AMQP, JMS, MQTT, REST, and WebSocket, and open APIs such as Paho and Qpid to enable interaction with the event broker. We will use the [RisingWave MQTT connector](/integrations/sources/mqtt) to read and write data from Solace. + +Once the RisingWave cluster is set up, navigate to the Workspace and connect to data streams by creating tables, materialized views, and sinks using SQL statements. + + +### Step 1: Create source table + +This query creates a table named `combined_passenger_flight_data` to store detailed passenger and flight information. The data is sourced from the Solace topic `passenger_full_details`, connected through the Solace broker, with the Quality of Service (QoS) level set to **at least once** and formatted as plain JSON. + +```sql +CREATE TABLE combined_passenger_flight_data ( + flight_id VARCHAR, + flight_number VARCHAR, + carrier_code VARCHAR, + flight_date DATE, + origin VARCHAR, + passenger_ref_number VARCHAR, + departure_time TIMESTAMPTZ, + opted_in BOOLEAN, + contact_info VARCHAR +) +WITH ( + connector = 'mqtt', + topic = 'passenger_full_details', + url = 'ssl://xxxxxxxxxx:8883', + username='solace-cloud-client', + password='xxxxxxxxxxxx', + qos = 'at_least_once' +) FORMAT PLAIN ENCODE JSON; +``` + +### Step 2: Use materialized view to filter + +This query creates a materialized view named `checkin_open_notification` that selects flight and passenger information for those who opted in and have flights departing within 48 to 72 hours from the current time. + +```sql +CREATE MATERIALIZED VIEW checkin_open_notification AS +SELECT flight_id, passenger_ref_number, flight_number, carrier_code, departure_time, contact_info +FROM combined_passenger_flight_data +WHERE opted_in = TRUE + AND departure_time <= NOW() - INTERVAL '48 hours' + AND departure_time > NOW() - INTERVAL '72 hours'; +``` + +### Step 3: Query the materialized view + +The materialized view can be queried to retrieve the latest data from the source: + +```sql +SELECT * FROM checkin_open_notification LIMIT 5; +``` + +The table chart lists passengers who opted in for notifications and have flights departing soon, showing `flight_id`, `passenger_ref_number`, `flight_number`, `carrier_code`, `departure_time`, and `contact_info`. It highlights passengers with flights departing before 48 from now, indicating that `check-in` is open. + + + + + +### Step 4: Create a sink to send notifications + +This query creates a sink named `checkin_notifications_sink`, which streams data from the `checkin_open_notification` view to the Solace topic `checkin_open_notification`. The connection to the Solace server is established with at-least-once Quality of Service (QoS), and the data is formatted as plain JSON. The online check-in notification system then retrieves this information from the Solace topic to send notifications to the passengers. + +```sql +CREATE SINK checkin_notifications_sink +FROM checkin_open_notification +WITH ( + connector = 'mqtt', + topic = 'checkin_open_notification', + url = 'ssl://xxxxxxxxxx:8883', + username='solace-cloud-client', + password='xxxxxxxxxxxx', + qos = 'at_least_once' +) FORMAT PLAIN ENCODE JSON; +``` + +We have successfully created a source table to read data from the Solace with an MQTT source connector, built a materialized view (MV) for querying and performing real-time analytics on the data, and set up a sink to send processed data to a Solace topic using the MQTT sink connector for downstream systems to utilize. \ No newline at end of file diff --git a/integrations/sources/sql-server-cdc.mdx b/integrations/sources/sql-server-cdc.mdx index d7dfc4a6..6647339b 100644 --- a/integrations/sources/sql-server-cdc.mdx +++ b/integrations/sources/sql-server-cdc.mdx @@ -12,7 +12,7 @@ This feature is exclusive to RisingWave Premium Edition that offers advanced cap **PUBLIC PREVIEW** -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage). Change Data Capture (CDC) refers to the process of identifying and capturing data changes in a database, and then delivering the changes to a downstream service in real time. @@ -52,7 +52,6 @@ EXEC sys.sp_cdc_enable_table @source_schema = 'dbo', @source_name = 't1', @role_ Replace `dbo` with the schema name and `t1` with the table name. -**NOTE** SQL Server allows you to create multiple CDC tables for the same source table using different capture instance names (@capture\_instance). However, RisingWave currently supports only a single capture instance per table. If your table has only one capture instance, RisingWave will automatically use it to create a CDC table. However, if there are multiple capture instances, RisingWave will select one at random for CDC table creation. @@ -87,7 +86,7 @@ CREATE TABLE [ IF NOT EXISTS ] table_name ( WITH ( snapshot='true' ) -FROM source TABLE table_name; +FROM source TABLE sqlserver_table_name; ``` Although SQL Server is case-insensitive in most cases, to avoid potential issues, please ensure that the case of the schema names, table names, and column names in RisingWave and SQL Server is consistent. @@ -97,25 +96,25 @@ Although SQL Server is case-insensitive in most cases, to avoid potential issues Unless specified otherwise, the fields listed are required. Note that the value of these parameters should be enclosed in single quotation marks. | Field | Notes | -| ------------- | ---------------------------- | +| :------------ | :--------------------------- | | hostname | Hostname of the database. | | port | Port number of the database. | | username | Username of the database. | | password | Password of the database. | | database.name | Name of the database. | +|database.encrypt| Optional. Specify whether to enable SSL encryption. Currently, `trustServerCertificate` is enabled regardless of the value of `database.encrypt`. | +| sqlserver_table_name | The identifier of SQL Server table in the format of `database_name.schema_name.table_name`. | - -**NOTE** - + As noted earlier, RisingWave will use the available capture instance to create a CDC table. If multiple capture instances exist, RisingWave will randomly choose one. Specifying a particular capture instance is not supported. Additionally, unlike MySQL and PostgreSQL, the SQL Server CDC connector does not support transactional CDC, as doing so would compromise the freshness of CDC sources. For further details, refer to the [Debezium SQL Server CDC connector documentation](https://debezium.io/documentation/reference/2.6/connectors/sqlserver.html#sqlserver-transaction-metadata). - + The following fields are used when creating a CDC table. -| Field | Notes | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Field | Notes | +| :------------------- | :--------------------------- | | snapshot | Optional. If false, CDC backfill will be disabled and only upstream events that have occurred after the creation of the table will be consumed. This option can only be applied for tables created from a shared source. | | snapshot.interval | Optional. Specifies the barrier interval for buffering upstream events. The default value is 1. | | snapshot.batch\_size | Optional. Specifies the batch size of a snapshot read query from the upstream table. The default value is 1000. | @@ -135,7 +134,7 @@ SELECT * FROM t2 ORDER BY v1; 4 | dd | 2024-05-20 09:01:08+00:00 ``` -#### Debezium parameters +### Debezium parameters [Debezium v2.6 connector configuration properties](https://debezium.io/documentation/reference/2.6/connectors/sqlserver.html#sqlserver-advanced-connector-configuration-properties) can also be specified under the `WITH` clause when creating a table or shared source. Add the prefix `debezium.` to the connector property you want to include. @@ -162,7 +161,7 @@ Data is in Debezium JSON format. [Debezium](https://debezium.io) is a log-based Below are the metadata columns available for SQL Server CDC. | Field | Notes | -| -------------- | --------------------- | +| :------------- | :-------------------- | | database\_name | Name of the database. | | schema\_name | Name of the schema. | | table\_name | Name of the table. | @@ -181,7 +180,7 @@ CREATE TABLE person ( INCLUDE DATABASE_NAME as database_name INCLUDE SCHEMA_NAME as schema_name INCLUDE TABLE_NAME as table_name -FROM mssql_source TABLE 'dbo.person'; +FROM mssql_source TABLE 'mydb.dbo.person'; ``` ## Examples @@ -228,8 +227,8 @@ The following table shows the corresponding data type in RisingWave that should RisingWave data types marked with an asterisk indicate that while there is no corresponding RisingWave data type, the ingested data can still be consumed as the listed type. -| SQL Server type | RisingWave type | -| --------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | +| SQL Server type | RisingWave type | +| :------- | :--------------------- | | BIT | BOOLEAN | | TINYINT, SMALLINT | SMALLINT | | INT | INTEGER | diff --git a/integrations/visualization/beekeeper-studio.mdx b/integrations/visualization/beekeeper-studio.mdx index 26277cc7..eaf1b97c 100644 --- a/integrations/visualization/beekeeper-studio.mdx +++ b/integrations/visualization/beekeeper-studio.mdx @@ -3,11 +3,9 @@ title: "Connect Beekeeper Studio to RisingWave" sidebarTitle: Beekeeper Studio description: "Beekeeper Studio is a modern, easy to use SQL editor and database manager. It provides a graphical user interface, allowing you to efficiently query and manage PostgreSQL, MySQL, SQL Server and more. Since RisingWave is PostgreSQL-compatible, you can easily connect Beekeeper Studio to RisingWave." --- - -**NOTE** - + RisingWave only supports connecting the Beekeeper Studio Community edition. The Ultimate (commercial) edition is not officially tested with RisingWave and may contain bugs. Please report any issues with the Ultimate edition to the RisingWave team. - + ## Prerequisites diff --git a/integrations/visualization/grafana.mdx b/integrations/visualization/grafana.mdx index 5ec3c4b3..310d30cb 100644 --- a/integrations/visualization/grafana.mdx +++ b/integrations/visualization/grafana.mdx @@ -32,13 +32,11 @@ To install Grafana locally, see the [Install Grafana](https://grafana.com/docs/g - -**NOTE** - + If both RisingWave and Grafana are started locally, the host domain can be either **localhost:4566** or **127.0.0.1:4566**. If you are running Grafana Cloud, the host domain should be your computer’s public IP address. - + 5. Click **Save & test**. diff --git a/mint.json b/mint.json index c256b133..02053f7e 100644 --- a/mint.json +++ b/mint.json @@ -32,7 +32,7 @@ "style": "roundedRectangle" }, "primaryTab": { - "name": "RisingWave" + "name": "Guides" }, "feedback": { "suggestEdit": true, @@ -66,16 +66,16 @@ "url": "sql" }, { - "name": "Python", - "url": "python" + "name": "Python SDK", + "url": "python-sdk" }, { "name": "Integrations", "url": "integrations" }, { - "name": "SDKs", - "url": "sdks" + "name": "Client Libraries", + "url": "client-libraries" }, { "name": "Changelog", @@ -121,12 +121,13 @@ {"source": "/docs/current/supported-sources-and-formats", "destination": "/ingestion/supported-sources-and-formats"}, {"source": "/docs/current/modify-schemas", "destination": "/ingestion/change-data-capture-with-risingwave"}, {"source": "/docs/current/include-clause", "destination": "/ingestion/ingest-additional-fields-with-include-clause"}, - {"source": "/docs/current/ingest-from-cdc", "destination": "/integrations/sources/kafka"}, + {"source": "/docs/current/ingest-from-cdc", "destination": "/integrations/sources/postgresql-cdc"}, {"source": "/docs/current/ingest-from-kafka", "destination": "/integrations/sources/kafka"}, {"source": "/docs/current/ingest-from-pulsar", "destination": "/integrations/sources/pulsar"}, {"source": "/docs/current/ingest-from-kinesis", "destination": "/integrations/sources/kinesis"}, {"source": "/docs/current/ingest-from-google-pubsub", "destination": "/integrations/sources/google-pub-sub"}, {"source": "/docs/current/ingest-from-redpanda", "destination": "/integrations/sources/redpanda"}, + {"source": "/docs/current/ingest-from-nats", "destination": "/integrations/sources/nats-jetstream"}, {"source": "/docs/current/ingest-from-mqtt", "destination": "/integrations/sources/mqtt"}, {"source": "/docs/current/ingest-from-postgres-cdc", "destination": "/integrations/sources/postgresql-cdc"}, {"source": "/docs/current/ingest-from-mysql-cdc", "destination": "/integrations/sources/mysql-cdc"}, @@ -137,7 +138,7 @@ {"source": "/docs/current/ingest-from-s3", "destination": "/integrations/sources/s3"}, {"source": "/docs/current/ingest-from-azure-blob", "destination": "/integrations/sources/azure-blob"}, {"source": "/docs/current/ingest-from-gcs", "destination": "/integrations/sources/google-cloud-storage"}, - {"source": "/docs/current/ingest-from-datagen", "destination": "/integrations/sources/datagen"}, + {"source": "/docs/current/ingest-from-datagen", "destination": "/ingestion/generate-test-data"}, {"source": "/docs/current/confluent-kafka-source", "destination": "/integrations/sources/confluent-cloud"}, {"source": "/docs/current/connector-amazon-msk", "destination": "/integrations/sources/amazon-msk"}, {"source": "/docs/current/ingest-from-automq-kafka", "destination": "/integrations/sources/automq-kafka"}, @@ -217,6 +218,7 @@ {"source": "/docs/current/sql-flush", "destination": "/sql/commands/sql-flush"}, {"source": "/docs/current/sql-grant", "destination": "/sql/commands/sql-grant"}, {"source": "/docs/current/sql-insert", "destination": "/sql/commands/sql-insert"}, + {"source": "/docs/current/sql-identifiers", "destination": "/sql/identifiers"}, {"source": "/docs/current/sql-recover", "destination": "/sql/commands/sql-recover"}, {"source": "/docs/current/sql-revoke", "destination": "/sql/commands/sql-revoke"}, {"source": "/docs/current/sql-select", "destination": "/sql/commands/sql-select"}, @@ -384,19 +386,20 @@ ] }, { - "group": "Python", + "group": "Python SDK", "pages": [ - "python/python" + "python-sdk/intro" ] }, { - "group": "SDKs", + "group": "Client Libraries", "pages": [ - "sdks/overview", - "sdks/go", - "sdks/nodejs", - "sdks/ruby", - "sdks/java" + "client-libraries/overview", + "client-libraries/go", + "client-libraries/nodejs", + "client-libraries/python", + "client-libraries/ruby", + "client-libraries/java" ] }, { @@ -647,8 +650,10 @@ "integrations/sources/pulsar", "integrations/sources/kinesis", "integrations/sources/google-pub-sub", + "integrations/sources/mqtt", + "integrations/sources/nats-jetstream", "integrations/sources/redpanda", - "integrations/sources/mqtt" + "integrations/sources/solace" ] }, { @@ -695,7 +700,9 @@ { "group": "MQTT", "pages": [ - "integrations/sources/coreflux-broker" + "integrations/sources/coreflux-broker", + "integrations/sources/emqx", + "integrations/sources/hivemq" ] } ] @@ -758,7 +765,10 @@ { "group": "Object storages", "pages": [ - "integrations/destinations/azure-blob" + "integrations/destinations/aws-s3", + "integrations/destinations/azure-blob", + "integrations/destinations/google-cloud-storage", + "integrations/destinations/webhdfs" ] } ] @@ -810,7 +820,7 @@ "group": "Operate", "pages": [ "operate/monitor-risingwave-cluster", - "operate/view-statement-progress", + "operate/monitor-statement-progress", "operate/alter-streaming", "operate/view-configure-system-parameters", "operate/view-configure-runtime-parameters", @@ -1018,5 +1028,10 @@ "linkedin": "https://go.risingwave.com/linkedin", "slack": "https://go.risingwave.com/slack", "youtube": "https://go.risingwave.com/youtube" + }, + "analytics": { + "ga4": { + "measurementId": "G-VG98SVDEYE" + } } } \ No newline at end of file diff --git a/operate/access-control.mdx b/operate/access-control.mdx index d4f288ba..450010c5 100644 --- a/operate/access-control.mdx +++ b/operate/access-control.mdx @@ -60,7 +60,7 @@ ALTER USER user1 RENAME TO user001; See the table below for the privileges available in RisingWave and the corresponding object levels that they can apply to. | Privilege | Description | Object Level | -| --------- | ----------------------------------------------------- | -------------------------------- | +| :-------- | :---------------------------------------------------- | :------------------------------- | | SELECT | Permission to retrieve data from a relation object. | Table, Source, Materialized View | | INSERT | Permission to add new rows to a table. | Table | | UPDATE | Permission to modify existing data in a table. | Table | diff --git a/operate/cluster-limit.mdx b/operate/cluster-limit.mdx index 7b9512b5..f6715f1d 100644 --- a/operate/cluster-limit.mdx +++ b/operate/cluster-limit.mdx @@ -30,7 +30,7 @@ meta_actor_cnt_per_worker_parallelism_hard_limit = 400 ``` -**CAUTION** + Please be aware that once you bypass the check or increase the limits, the cluster could become overloaded, leading to issues with stability, availability, or performance. diff --git a/operate/dedicated-compute-node.mdx b/operate/dedicated-compute-node.mdx index 92dbe798..a837ded7 100644 --- a/operate/dedicated-compute-node.mdx +++ b/operate/dedicated-compute-node.mdx @@ -14,11 +14,9 @@ You need to restart the node to update the role. A role can be one of: * `serving`: Indicates that the compute node is read-only and executes batch queries only. * `streaming`: Indicates that the compute node is only available for streaming. - -**NOTE** - + In a production environment, it's advisable to use separate nodes for batch and streaming operations. The `both` mode, which allows a node to handle both batch and streaming queries, is more suited for testing scenarios. While it's possible to execute batch and streaming queries concurrently, it's recommended to avoid running resource-intensive batch and streaming queries at the same time. - + For specific changes required in the YAML file, see [Separate batch streaming modes](https://github.com/risingwavelabs/risingwave-operator/blob/main/docs/manifests/risingwave/advanced/separate-batch-streaming-modes.yaml). diff --git a/operate/manage-a-large-number-of-streaming-jobs.mdx b/operate/manage-a-large-number-of-streaming-jobs.mdx index ea894917..7809f40b 100644 --- a/operate/manage-a-large-number-of-streaming-jobs.mdx +++ b/operate/manage-a-large-number-of-streaming-jobs.mdx @@ -110,7 +110,7 @@ worker_id|count| To rebalance the actor, you can use the alter parallelism statement mentioned above after v1.7.0, and the actors will be distributed to different compute nodes automatically. -**CAUTION** + In some references, `/risingwave/bin/risingwave ctl scale horizon --include-workers all` is used to scale out all streaming jobs to avoid the skewed actor distribution. However, this approach may not be sufficient when dealing with a large number of streaming jobs, as it does not consider the `default_parallelism` parameter. diff --git a/operate/manage-secrets.mdx b/operate/manage-secrets.mdx index 481a1571..f2312589 100644 --- a/operate/manage-secrets.mdx +++ b/operate/manage-secrets.mdx @@ -23,7 +23,7 @@ This feature is exclusive to RisingWave Premium Edition that offers advanced cap **PUBLIC PREVIEW** -This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage). ## Create secrets @@ -44,11 +44,9 @@ CREATE SECRET mysql_pwd WITH ( ) AS '123'; ``` - -**NOTE** - + Currently only the meta backend is supported. - + ## Use secrets diff --git a/operate/meta-backup.mdx b/operate/meta-backup.mdx index 06e02558..81dac66e 100644 --- a/operate/meta-backup.mdx +++ b/operate/meta-backup.mdx @@ -10,8 +10,6 @@ A meta snapshot is a backup of meta service's data at a specific point in time. Before you can create a meta snapshot, you need to set the `backup_storage_url` and `backup_storage_directory` system parameters prior to the first backup attempt. -**CAUTION** - Be careful not to set the `backup_storage_url` and `backup_storage_directory` when there are snapshots. However, it's not strictly forbidden. If you insist on doing so, please note the snapshots taken before the setting will all be invalidated and cannot be used in restoration anymore. @@ -63,11 +61,9 @@ Below are two separate methods to restore from a meta snapshot using SQL databas If the cluster has been using a SQL database as meta store backend, follow these steps to restore from a meta snapshot. 1. Shut down the meta service. - -**NOTE** - + This step is especially important because the meta backup and recovery process does not replicate SST files. It is not permitted for multiple clusters to run with the same SSTs set at any time, as this can corrupt the SST files. - + 2. Create a new meta store, i.e. a new SQL database instance. Note that this new SQL database instance must have the exact same tables defined as the original, but all tables should remain empty. To achieve this, you can optionally use the [schema migration tool](https://github.com/risingwavelabs/risingwave/tree/main/src/meta/model%5Fv2/migration) to create tables, then truncate those non-empty tables populated by the tool. 3. Restore the meta snapshot to the new meta store. @@ -106,11 +102,9 @@ Parameters to `risectl meta restore-meta` should be: If the cluster has been using etcd as meta store backend, follow these steps to restore from a meta snapshot. 1. Shut down the meta service. - -**NOTE** - + This step is especially important because the meta backup and recovery process does not replicate SST files. It is not permitted for multiple clusters to run with the same SSTs set at any time, as this can corrupt the SST files. - + 2. Create a new meta store, i.e. a new and empty etcd instance. 3. Restore the meta snapshot to the new meta store. @@ -179,7 +173,5 @@ SET QUERY_EPOCH=0; ``` -**LIMITATION** - RisingWave only supports historical data access at a specific point in time backed up by at least one meta snapshot. diff --git a/operate/view-statement-progress.mdx b/operate/monitor-statement-progress.mdx similarity index 100% rename from operate/view-statement-progress.mdx rename to operate/monitor-statement-progress.mdx diff --git a/operate/secure-connections-with-ssl-tls.mdx b/operate/secure-connections-with-ssl-tls.mdx index edb0479a..983e5473 100644 --- a/operate/secure-connections-with-ssl-tls.mdx +++ b/operate/secure-connections-with-ssl-tls.mdx @@ -26,11 +26,9 @@ SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, bits: 256, co ## Optional: Create a self-signed certificate for testing purposes - -**NOTE** - + While a self-signed certificate is suitable for testing, it is recommended to obtain a certificate from a Certificate Authority (CA) for production environments. - + To create a simple self-signed certificate for the server, valid for 365 days, for testing purposes, use the OpenSSL command below. Replace `localhost` with the desired Common Name (CN). ```bash diff --git a/operate/view-configure-runtime-parameters.mdx b/operate/view-configure-runtime-parameters.mdx index a55b0f98..e7411165 100644 --- a/operate/view-configure-runtime-parameters.mdx +++ b/operate/view-configure-runtime-parameters.mdx @@ -27,10 +27,10 @@ For example, you may see a table similar to this: Below is the detailed information about the parameters you may see after using the `SHOW ALL` command: -| Name | Values or value examples | Description | -| -------------------------------------------- | ------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| rw\_implicit\_flush | true/false | If RW\_IMPLICIT\_FLUSH is on, then every INSERT/UPDATE/DELETE statement will block until the entire dataflow is refreshed. In other words, every related table & MV will be able to see the write. | -| create\_compaction\_group\_for\_mv | true/false | If CREATE\_COMPACTION\_GROUP\_FOR\_MV is on, dedicated compaction groups will be created in MV creation. | +| Name | Values or examples | Description | +| :--- | :--- | :--- | +| rw\_implicit\_flush | true/false | If `RW_IMPLICIT_FLUSH` is on, then every INSERT/UPDATE/DELETE statement will block until the entire dataflow is refreshed. In other words, every related table & MV will be able to see the write. | +| create\_compaction\_group\_for\_mv | true/false | If `CREATE_COMPACTION_GROUP_FOR_MV` is on, dedicated compaction groups will be created in MV creation. | | query\_mode | auto | A temporary config variable to force query running in either local or distributed mode. The default value is auto which means let the system decide to run batch queries in local or distributed mode automatically. | | extra\_float\_digits | 1 | Set the number of digits displayed for floating-point values. See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#:~:text=for%20more%20information.-,extra%5Ffloat%5Fdigits,-%28integer%29) for details. | | application\_name | psql | Set the application name to be reported in statistics and logs. See [here](https://www.postgresql.org/docs/14/runtime-config-logging.html#:~:text=What%20to%20Log-,application%5Fname,-%28string%29) for details. | @@ -40,39 +40,39 @@ Below is the detailed information about the parameters you may see after using t | batch\_enable\_distributed\_dml | true/false | Enable distributed DML, allowing INSERT/UPDATE/DELETE statements to be executed in a distributed way, such as running on multiple compute nodes. Defaults to false. | | max\_split\_range\_gap | 8 | The max gap allowed to transform small range scan into multi point lookup. | | search\_path | "$user", public | Set the order in which schemas are searched when an object (table, data type, function, etc.) is referenced by a simple name with no schema specified. See [here](https://www.postgresql.org/docs/14/runtime-config-client.html#GUC-SEARCH-PATH) for details. | -| visibility\_mode | default | If VISIBILITY\_MODE is all, we will support querying data without checkpoint. | +| visibility\_mode | default | If `VISIBILITY_MODE` is `all`, we will support querying the latest uncommitted data, and consistency is not guaranteed between the tables. | | transaction\_isolation | read committed | See [here](https://www.postgresql.org/docs/current/transaction-iso.html) for details. | | query\_epoch | 0 | Select as of specific epoch. Sets the historical epoch for querying data. If 0, querying latest data. | | timezone | UTC | Session timezone. Defaults to UTC. | -| streaming\_parallelism | ADAPTIVE/0,1,2,... | If STREAMING\_PARALLELISM is non-zero, CREATE MATERIALIZED VIEW/TABLE/INDEX will use it as streaming parallelism. | +| streaming\_parallelism | ADAPTIVE/0,1,2,... | If `STREAMING_PARALLELISM` is non-zero, CREATE MATERIALIZED VIEW/TABLE/INDEX will use it as streaming parallelism. | | rw\_streaming\_enable\_delta\_join | true/false | Enable delta join for streaming queries. Defaults to false. | | rw\_streaming\_enable\_bushy\_join | true/false | Enable bushy join for streaming queries. Defaults to true. | | streaming\_use\_arrangement\_backfill | true/false | Enable arrangement backfill for streaming queries. Defaults to true. | | streaming\_use\_snapshot\_backfill | true/false | Enable snapshot backfill for streaming queries. Defaults to false. | | rw\_enable\_join\_ordering | true/false | Enable join ordering for streaming and batch queries. Defaults to true. | -| rw\_enable\_two\_phase\_agg | true/false | Enable two phase agg optimization. Defaults to true. Setting this to true will always set FORCE\_TWO\_PHASE\_AGG to false. | -| rw\_force\_two\_phase\_agg | true/false | Force two phase agg optimization whenever there's a choice between optimizations. Defaults to false. Setting this to true will always set ENABLE\_TWO\_PHASE\_AGG to false. | +| rw\_enable\_two\_phase\_agg | true/false | Enable two phase agg optimization. Defaults to true. Setting this to true will always set `FORCE_TWO_PHASE_AGG` to false. | +| rw\_force\_two\_phase\_agg | true/false | Force two phase agg optimization whenever there's a choice between optimizations. Defaults to false. Setting this to true will always set `ENABLE_TWO_PHASE_AGG` to false. | | rw\_enable\_share\_plan | true/false | Enable sharing of common sub-plans. This means that DAG structured query plans can be constructed, rather than only tree structured query plans. | | rw\_force\_split\_distinct\_agg | true/false | Enable split distinct agg. | | intervalstyle | postgres | Set the display format for interval values. It is typically set by an application upon connection to the server. See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-INTERVALSTYLE) for details. | -| batch\_parallelism | 0 | If BATCH\_PARALLELISM is non-zero, batch queries will use this parallelism. | +| batch\_parallelism | 0 | If `BATCH_PARALLELISM` is non-zero, batch queries will use this parallelism. | | server\_version | 9.5.0 | The version of PostgreSQL that Risingwave claims to be. | | server\_version\_num | 90500 | The version of PostgreSQL that Risingwave claims to be. | | client\_min\_messages | notice | See [here](https://www.postgresql.org/docs/15/runtime-config-client.html#GUC-CLIENT-MIN-MESSAGES) for details. | | client\_encoding | UTF8 | See [here](https://www.postgresql.org/docs/15/runtime-config-client.html#GUC-CLIENT-ENCODING) for details. | | sink\_decouple | default | Enable decoupling sink and internal streaming graph or not. | | synchronize\_seqscans | true/false | See [here](https://www.postgresql.org/docs/current/runtime-config-compatible.html#RUNTIME-CONFIG-COMPATIBLE-VERSION) for details. Unused in RisingWave, support for compatibility. | -| statement\_timeout | 3600 | Abort query statement that takes more than the specified amount of time in sec. If log\_min\_error\_statement is set to ERROR or lower, the statement that timed out will also be logged. The default value is 1 hour. | +| statement\_timeout | 3600 | Abort query statement that takes more than the specified amount of time in sec. If `log_min_error_statement` is set to ERROR or lower, the statement that timed out will also be logged. The default value is 1 hour. | | lock\_timeout | 0 | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-LOCK-TIMEOUT) for details. Unused in RisingWave, support for compatibility. | | cdc\_source\_wait\_streaming\_start\_timeout | 30 | For limiting the startup time of a shareable CDC streaming source when the source is being created. Unit: seconds. | | row\_security | true/false | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-ROW-SECURITY) for details. Unused in RisingWave, support for compatibility. | | standard\_conforming\_strings | on | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-STANDARD-CONFORMING-STRINGS) for details. | -| source\_rate\_limit | default/ A positive integer / 0 | Set the maximum number of records per second per source, for each parallelism. The source here refers to an upstream source. This parameter is applied to tables and tables with sources. The value can be default, 0, or a positive integer. SET SOURCE\_RATE\_LIMIT TO 0 will pause the source read for sources. SET SOURCE\_RATE\_LIMIT TO DEFAULT will disable the rate limit within the session, but it will not change the rate limits of existing DDLs.Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the streaming\_parallelism runtime parameter in this table. Additionally, we support altering rate limits in [sources](/docs/current/sql-alter-source/#set-source-rate-limit) and [tables that have source](/docs/current/sql-alter-table/#set-source-rate-limit). | -| backfill\_rate\_limit | default/ A positive integer / 0 | Set the maximum number of records per second per parallelism for the backfill process of materialized views, sinks, and indexes. This parameter throttles the snapshot read stream for backfill. The value can be default, 0, or a positive integer. SET BACKFILL\_RATE\_LIMIT TO 0 will pause the snapshot read stream for backfill. SET BACKFILL\_RATE\_LIMIT TO default will disable the backfill rate limit within the session, but it will not change the backfill rate limit of existing DDLs. To obtain the parallelism value for a streaming job, you can refer to the streaming\_parallelism runtime parameter in this table. Additionally, we support altering backfill rate limits in [materialized views](/docs/current/sql-alter-materialized-view/#set-backfill%5Frate%5Flimit) and [CDC tables](/docs/current/sql-alter-table/#set-backfill%5Frate%5Flimit). | -| rw\_streaming\_over\_window\_cache\_policy | full | Cache policy for partition cache in streaming over window. Can be "full", "recent", "recent\_first\_n" or "recent\_last\_n". | +| source\_rate\_limit | default/positive integer/0 | Set the maximum number of records per second per source, for each parallelism. This parameter is applied when creating new sources and tables with sources.

The value can be default, 0, or a positive integer.
  • SET `SOURCE_RATE_LIMIT` TO 0 will pause the source read for sources.
  • SET `SOURCE_RATE_LIMIT` TO DEFAULT will remove the rate limit.
Setting this variable will only affect new DDLs within the session, but not change the rate limits of existing jobs. Use `ALTER` to change the rate limits in existing [sources](/sql/commands/sql-alter-source/#set-source-rate-limit) and [tables that have source](/sql/commands/sql-alter-table/#set-source-rate-limit).

Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the `streaming_parallelism` runtime parameter in this table. | +| backfill\_rate\_limit | default/positive integer/0 | Set the maximum number of records per second per parallelism for the backfill process of materialized views, sinks, and indexes. This parameter is applied when creating new jobs, and throttles the backfill from upstream materialized views and sources.

The value can be default, 0, or a positive integer.
SET `BACKFILL_RATE_LIMIT` TO 0 will pause the backfill.
SET `BACKFILL_RATE_LIMIT` TO default will remove the backfill rate limit.

Setting this variable will only affect new DDLs within the session, but not change the rate limits of existing jobs. Use `ALTER` to change the backfill rate limits in existing [materialized views](/sql-alter-materialized-view/#set-backfill%5Frate%5Flimit) and [CDC tables](/sql/commands/sql-alter-table/#set-backfill%5Frate%5Flimit).

Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the `streaming_parallelism` runtime parameter in this table. | +| rw\_streaming\_over\_window\_cache\_policy | full | Cache policy for partition cache in streaming over window. Can be `full`, `recent`, `recent_first_n` or `recent_last_n`. | | background\_ddl | true/false | Run DDL statements in background. | | server\_encoding | UTF8 | Show the server-side character set encoding. At present, this parameter can be shown but not set, because the encoding is determined at database creation time. | -| bytea\_output | hex | Set the output format for values of type bytea. Valid values are hex (the default) and escape (the traditional PostgreSQL format). See Section 8.4 for more information. The bytea type always accepts both formats on input, regardless of this setting. | +| bytea\_output | hex | Set the output format for values of type bytea. Valid values are hex (the default) and escape (the traditional PostgreSQL format). The bytea type always accepts both formats on input, regardless of this setting. | If you just want to view a specific parameter's value, you can also use the `SHOW` command. diff --git a/operate/view-configure-system-parameters.mdx b/operate/view-configure-system-parameters.mdx index bf51312b..9ca1cef4 100644 --- a/operate/view-configure-system-parameters.mdx +++ b/operate/view-configure-system-parameters.mdx @@ -9,11 +9,11 @@ System parameters in RisingWave refer to the parameters that advanced users can Currently, these system parameters are available in RisingWave. | Parameter | Description | -| ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :----------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | barrier\_interval\_ms | The time interval of the periodic barriers. | | checkpoint\_frequency | Specify the number of barriers for which a checkpoint will be created. The value must be a positive integer. | | sstable\_size\_mb | The target size of SSTable. | -| parallel\_compact\_size\_mb | This parameter, together with max\_sub\_compaction, controls the concurrency of individual tasks. If the data size is smaller than parallel\_compact\_size\_mb, only a single thread is used to execute the compaction task. If the data size of an individual task exceeds parallel\_compact\_size\_mb, multiple concurrent threads are started to complete the task. At this time, if the data size is N, then the total number of these threads is determined by dividing N by parallel\_compact\_size\_mb. Additionally, each sub-task's size cannot exceed parallel\_compact\_size\_mb, and the total number of sub-tasks cannot exceed max\_sub\_compaction. | +| parallel\_compact\_size\_mb | This parameter, together with `max_sub_compaction`, controls the concurrency of individual tasks.
  • If the data size is smaller than `parallel_compact_size_mb`, only a single thread is used to execute the compaction task.
  • If the data size of an individual task exceeds `parallel_compact_size_mb`, multiple concurrent threads are started to complete the task. At this time, if the data size is N, then the total number of these threads is determined by dividing N by `parallel_compact_size_mb`.
Additionally, each sub-task's size cannot exceed `parallel_compact_size_mb`, and the total number of sub-tasks cannot exceed `max_sub_compaction`. | | block\_size\_kb | The size of each block in bytes in SSTable. | | bloom\_false\_positive | False positive rate of bloom filter in SSTable. | | state\_store | The state store URL. | @@ -22,7 +22,7 @@ Currently, these system parameters are available in RisingWave. | backup\_storage\_directory | The directory of the remote storage for backups. | | telemetry\_enabled | Whether to enable telemetry or not. For more information, see [Telemetry](/docs/current/telemetry/). | | max\_concurrent\_creating\_streaming\_jobs | The maximum number of streaming jobs that can be created concurrently. That is, the maximum of materialized views, indexes, tables, sinks, or sources that can be created concurrently. | -| pause\_on\_next\_bootstrap | This parameter is used for debugging and maintenance purposes. Setting it to true will pause all data sources, such as connectors and DMLs, when the cluster restarts. This parameter will then be reset to its default value (false). To resume data ingestion, simply run risectl meta resume or restart the cluster again. | +| pause\_on\_next\_bootstrap | This parameter is used for debugging and maintenance purposes. Setting it to true will pause all data sources, such as connectors and DMLs, when the cluster restarts. This parameter will then be reset to its default value (false). To resume data ingestion, simply run `risectl meta resume` or restart the cluster again. | | enable\_tracing | Whether to enable distributed tracing. This parameter is used to toggle the opentelemetry tracing during runtime. Its default value is false. | | time\_travel\_retention\_ms | The data retention period for time travel. Defaults to 0, which disables time travel. To enable this feature, set it to a a non-zero value. | @@ -80,8 +80,6 @@ For example, to initialize the setting of `data_directory`: `meta-node --data_directory "hummock_001"` - -**NOTE** - + As RisingWave reads system parameters at different times, there is no guarantee that a parameter value change will take effect immediately. We recommend that you adjust system parameters before running a streaming query after your RisingWave cluster starts. - + diff --git a/performance/performance-faq.mdx b/performance/performance-faq.mdx index cdfbd2d4..d1db3f0b 100644 --- a/performance/performance-faq.mdx +++ b/performance/performance-faq.mdx @@ -1,5 +1,5 @@ --- -title: "Performance-related FAQs" +title: "FAQs" description: This topic addresses common queries related to resource allocation and adjustment for both streaming and batch queries. This will assist you in fine-tuning performance and maximizing efficiency. mode: wide --- diff --git a/processing/indexes.mdx b/processing/indexes.mdx index b19dfc6c..681a0e60 100644 --- a/processing/indexes.mdx +++ b/processing/indexes.mdx @@ -90,8 +90,6 @@ SELECT c_name, c_address FROM customers WHERE c_phone = '123456789'; ``` -**TIP** - You can use the [EXPLAIN](/docs/current/sql-explain/) command to view the execution plan. diff --git a/processing/maintain-wide-table-with-table-sinks.mdx b/processing/maintain-wide-table-with-table-sinks.mdx index 9d403b42..dba01d0e 100644 --- a/processing/maintain-wide-table-with-table-sinks.mdx +++ b/processing/maintain-wide-table-with-table-sinks.mdx @@ -34,11 +34,10 @@ CREATE SINK sink3 INTO wide_d (v3,k) AS ); ``` - -**NOTE** + Keep in mind that the `ON CONFLICT` clause does not affect the update or delete events, the sinks should be forced to be append-only. Otherwise, the delete or update events from any sink will delete the regarding row. - + ## Enrich data with foreign keys in Star/Snowflake schema model diff --git a/processing/sql/joins.mdx b/processing/sql/joins.mdx index 29a65620..61513613 100644 --- a/processing/sql/joins.mdx +++ b/processing/sql/joins.mdx @@ -60,6 +60,108 @@ A full outer join (or simply, full join) returns all rows when there is a match NATURAL FULL [ OUTER ] JOIN ; ``` +## ASOF joins + + +**PUBLIC PREVIEW** + +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). + + +An ASOF join returns the nearest record in a reference table based on the event time or any ordered properties. + +RisingWave supports these ASOF join types: + +- Inner ASOF join matches records only when both tables have corresponding data. Here's the syntax of an inner ASOF join: + + ```sql + SELECT A.field1 AS A_field1 + FROM TableA ASOF JOIN TableB + ON A.field1 = B.field1 AND A.field2 <= B.field2; + ``` + +- Outer ASOF join includes all records from the left table, even if there is no match in the right table. When there is no match in the right table, the columns from the right table will have NULL values. Here's the syntax of an outer ASOF join: + ```sql + SELECT A.field1 AS A_field1 + FROM TableA ASOF LEFT JOIN TableB + ON A.field1 = B.field1 AND A.field2 <= B.field2; + ``` +In both types of ASOF joins, the join condition must include at least one equality condition (`=`) and one inequality condition (`>=`, `>`, `<=`, or `<`). The inequality condition applies to all data types that support inequality comparison while a time-related type is commonly used. +ASOF join is currently supported for streaming operations only. +For example, suppose you have two tables: + +- `stock_prices`: Contains stock price data at certain timestamps. + | stock_name | stock_time | price | + |:-----------|:----------------------|:------| + | TSLA | 2024-09-24 09:30:00 | 250 | + | TSLA | 2024-09-24 10:30:00 | 252 | + | TSLA | 2024-09-24 11:30:00 | 255 | + | AMZN | 2024-09-24 09:30:00 | 3300 | + | AMZN | 2024-09-24 10:30:00 | 3310 | + | AMZN | 2024-09-24 11:30:00 | 3320 | + | GOOG | 2024-09-24 09:30:00 | 1400 | + | GOOG | 2024-09-24 10:30:00 | 1410 | + | GOOG | 2024-09-24 11:30:00 | 1420 | + +- `market_data`: Contains market sentiment data at different timestamps. + | stock_name | market_time | sentiment | + |:-----------|:----------------------|:----------| + | TSLA | 2024-09-24 09:00:00 | 0.7 | + | TSLA | 2024-09-24 10:00:00 | 0.8 | + | TSLA | 2024-09-24 11:00:00 | 0.9 | + | AMZN | 2024-09-24 09:00:00 | 0.6 | + | AMZN | 2024-09-24 10:00:00 | 0.65 | + | AMZN | 2024-09-24 11:00:00 | 0.7 | + | NVDA | 2024-09-24 09:00:00 | 0.55 | + | NVDA | 2024-09-24 10:00:00 | 0.6 | + | NVDA | 2024-09-24 11:00:00 | 0.65 | + +We want to join the stock prices with the nearest preceding market sentiment for each stock price based on time. We can use an ASOF JOIN to find the latest matching record in `market_data` where the `market_time` is less than or equal to the `stock_time`: + +```sql +SELECT sp.stock_name, sp.stock_time, sp.price, md.sentiment +FROM stock_prices sp +ASOF JOIN market_data md +ON sp.stock_name = md.stock_name +AND md.market_time <= sp.stock_time; +``` + +Output: + +| stock_name | stock_time | price | sentiment | +|:-----------|:----------------------|:------|:----------| +| TSLA | 2024-09-24 09:30:00 | 250 | 0.7 | +| TSLA | 2024-09-24 10:30:00 | 252 | 0.8 | +| TSLA | 2024-09-24 11:30:00 | 255 | 0.9 | +| AMZN | 2024-09-24 09:30:00 | 3300 | 0.6 | +| AMZN | 2024-09-24 10:30:00 | 3310 | 0.65 | +| AMZN | 2024-09-24 11:30:00 | 3320 | 0.7 | + +We can use an ASOF LEFT JOIN to output records in the left table that have no matches in the right table. + +```sql +SELECT sp.stock_name, sp.stock_time, sp.price, md.sentiment +FROM stock_prices sp +ASOF LEFT JOIN market_data md +ON sp.stock_name = md.stock_name +AND md.market_time <= sp.stock_time; +``` +Output: +| stock_name | stock_time | price | sentiment | +| :----------| :---------------------|:------|:----------| +| TSLA | 2024-09-24 09:30:00 | 250 | 0.7 | +| TSLA | 2024-09-24 10:30:00 | 252 | 0.8 | +| TSLA | 2024-09-24 11:30:00 | 255 | 0.9 | +| AMZN | 2024-09-24 09:30:00 | 3300 | 0.6 | +| AMZN | 2024-09-24 10:30:00 | 3310 | 0.65 | +| AMZN | 2024-09-24 11:30:00 | 3320 | 0.7 | +| GOOG | 2024-09-24 09:30:00 | 1400 | NULL | +| GOOG | 2024-09-24 10:30:00 | 1410 | NULL | +| GOOG | 2024-09-24 11:30:00 | 1420 | NULL | + +TSLA and AMZN have matching records in `market_data`, so they show the closest preceding sentiment. +GOOG has no corresponding data in `market_data`, so the sentiment column is NULL. + ## Windows joins In a regular join (that is, a join without time attributes), the join state may grow without restriction. If you only need to get windowed results of two sources, you can segment data in the sources into time windows, and join matching windows from the two sources. To create a window join, the same time window functions must be used, and the window size must be the same. @@ -127,9 +229,10 @@ FROM s1 JOIN s2 ON s1.id = s2.id and s1.ts between s2.ts and s2.ts + INTERVAL '1' MINUTE; ``` -#### Notes + +Interval join‘s state cleaning is triggered only when upstream messages arrive, and it operates at the granularity of each join key. As a result, if no messages are received for a join key, the state may still hold stale data. -* Interval join‘s state cleaning is triggered only when upstream messages arrive, and it operates at the granularity of each join key. As a result, if no messages are received for a join key, the state may still hold stale data. + ## Process-time temporal joins @@ -147,19 +250,22 @@ SELECT ... FROM [AS ] ON ; ``` -#### Notes -* The left table expression is an append-only table or source. -* The right table expression is a table, index or materialized view. -* The process-time syntax `FOR SYSTEM_TIME AS OF PROCTIME()` is included in the right table expression. -* The join type is INNER JOIN or LEFT JOIN. -* The Join condition includes the primary key of the right table expression. + +- The left table expression is an append-only table or source. +- The right table expression is a table, index or materialized view. +- The process-time syntax `FOR SYSTEM_TIME AS OF PROCTIME()` is included in the right table expression. +- The join type is INNER JOIN or LEFT JOIN. +- The Join condition includes the primary key of the right table expression. + + + #### Example If you have an append-only stream that includes messages like below: | transaction\_id | product\_id | quantity | sale\_date | process\_time | -| --------------- | ----------- | -------- | ---------- | ------------------- | +| :-------------- | :---------- | :------- | :--------- | :------------------ | | 1 | 101 | 3 | 2023-06-18 | 2023-06-18 10:15:00 | | 2 | 102 | 2 | 2023-06-19 | 2023-06-19 15:30:00 | | 3 | 101 | 1 | 2023-06-20 | 2023-06-20 11:45:00 | @@ -167,7 +273,7 @@ If you have an append-only stream that includes messages like below: And a versioned table `products`: | id | product\_name | price | valid\_from | valid\_to | -| --- | ------------- | ----- | ------------------- | ------------------- | +| :-- | :------------ | :---- | :------------------ | :------------------ | | 101 | Product A | 20 | 2023-06-01 00:00:00 | 2023-06-15 23:59:59 | | 101 | Product A | 25 | 2023-06-16 00:00:00 | 2023-06-19 23:59:59 | | 101 | Product A | 22 | 2023-06-20 00:00:00 | NULL | @@ -185,7 +291,7 @@ ON product_id = id WHERE process_time BETWEEN valid_from AND valid_to; ``` | transaction\_id | product\_id | quantity | sale\_date | product\_name | price | -| --------------- | ----------- | -------- | ---------- | ------------- | ----- | +| :-------------- | :---------- | :------- | :--------- | :------------ | :---- | | 1 | 101 | 3 | 2023-06-18 | Product A | 25 | | 2 | 102 | 2 | 2023-06-19 | Product B | 15 | | 3 | 101 | 1 | 2023-06-20 | Product A | 22 | @@ -213,13 +319,11 @@ UPDATE sales SET quantity = quantity + 1; You will get these results: | transaction\_id | product\_id | quantity | sale\_date | product\_name | price | -| --------------- | ----------- | -------- | ---------- | ------------- | ----- | +| :-------------- | :---------- | :------- | :--------- | :------------ | :---- | | 1 | 101 | 4 | 2023-06-18 | Product A | 25 | | 2 | 102 | 3 | 2023-06-19 | Product B | 15 | | 3 | 101 | 2 | 2023-06-20 | Product A | 22 | - -**NOTE** + Every time you update the left-hand side table, it will look up the latest data from the right-hand side table. - - + diff --git a/processing/sql/temporal-filters.mdx b/processing/sql/temporal-filters.mdx index 6af6836a..797bb1ec 100644 --- a/processing/sql/temporal-filters.mdx +++ b/processing/sql/temporal-filters.mdx @@ -105,14 +105,12 @@ However, due to delays caused by the network or other phases, it is not guarante LEFT JOIN dimension FOR SYSTEM_TIME AS OF PROCTIME() ON id1 = id2; ``` - -**NOTE** - + Currently, RisingWave's optimizer cannot ensure the temporal filter's predicate pushdown. Please add the temporal filter in the `FROM` clause as a sub-query, like the SQL example, instead of writing the temporal filter in the query's top `WHERE` clause. - + -**INFO** + The `PROCTIME` in the example can be replaced with the event time in the records. diff --git a/processing/sql/time-windows.mdx b/processing/sql/time-windows.mdx index e4916d56..9652aebb 100644 --- a/processing/sql/time-windows.mdx +++ b/processing/sql/time-windows.mdx @@ -116,11 +116,9 @@ The result looks like the table below. Note that the number of rows in the resul In RisingWave, session windows are supported by a special type of window function frame: `SESSION` frame. You can refer to [Window function calls](/docs/current/query-syntax-value-exp/#window-function-calls) for detailed syntax. - -**NOTE** - + Currently, `SESSION` frame is only supported in batch mode and emit-on-window-close streaming mode. - + When using session windows, you can achieve the effect that is very similar to `tumble()` and `hop()` time window functions, that is, to assign each row a time window by augmenting it with `window_start` and `window_end`. Here is an example: diff --git a/processing/sql/top-n-by-group.mdx b/processing/sql/top-n-by-group.mdx index 98576e46..5d168b50 100644 --- a/processing/sql/top-n-by-group.mdx +++ b/processing/sql/top-n-by-group.mdx @@ -23,20 +23,18 @@ function_name() OVER ([PARTITION BY col1[, col2...]] ORDER BY col1 [ ASC | DESC ][, col2 [ ASC | DESC ]...]) ``` - -**NOTE** - + `rank` cannot be included in `column_list`. - + -**INFO** + You must follow the pattern exactly to construct a valid Top-N query. | Parameter | Description | -| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _function\_name_ | RisingWave supports two window functions in top-N queries: row\_number(): Returns the sequential row ordinal (1-based) of each row for each ordered partition.rank(): Returns the ordinal (1-based) rank of each row within the ordered partition. All peer rows receive the same rank value. The next row or set of peer rows receives a rank value which increments by the number of peers with the previous rank value. | | PARTITION BY clause | Specifies the partition columns. Each partition will have a Top-N result. | | ORDER BY clause | Specifies how the rows are ordered. | diff --git a/processing/time-travel-queries.mdx b/processing/time-travel-queries.mdx index 9cfabd24..aff430fe 100644 --- a/processing/time-travel-queries.mdx +++ b/processing/time-travel-queries.mdx @@ -23,11 +23,9 @@ The system parameter `time_travel_retention_ms` controls time travel functionali For example, you can set `time_travel_retention_ms` to `86400000` (1 day). Then historical data older than this period will be deleted and no longer accessible. - -**NOTE** - + Enabling time travel will introduce additional overhead to both the meta store and the object store. - + ## Syntax @@ -37,11 +35,9 @@ Specify `FOR SYSTEM_TIME AS OF` separately for each table accessing historical d * Datetime. For example, `SELECT * FROM t_foo FOR SYSTEM_TIME AS OF '2000-02-29T12:13:14-08:30';`. * NOW() \[ - Interval \]. For example, `SELECT * FROM t_foo FOR SYSTEM_TIME AS OF NOW() - '10' SECOND;`. - -note - + If you specify a point in time that is outside the time travel period, the query will return an error, like `time travel: version not found for epoch`. - + ## Storage space reclamation diff --git a/processing/watermarks.mdx b/processing/watermarks.mdx index db3a4a06..52b2e4d7 100644 --- a/processing/watermarks.mdx +++ b/processing/watermarks.mdx @@ -6,7 +6,7 @@ description: "In stream processing, watermarks are integral when using event tim Let us go over an example on how watermarks are generated and utilized during window computations. Say the following events and their corresponding event-time timestamps arrive. | Event | Timestamp | -| ------- | ----------- | +| :------ | :---------- | | Event F | 11:59:30 AM | | Event G | 12:00:00 PM | | Event H | 12:00:10 PM | @@ -15,7 +15,7 @@ Let us go over an example on how watermarks are generated and utilized during wi Consider a scenario where the watermark is set as the maximum event time observed so far minus 10 seconds. So the following watermarks will be generated. | Event | Timestamp | Watermark | -| ------- | ----------- | ----------- | +| :------ | :---------- | :---------- | | Event F | 11:59:30 AM | 11:59:20 AM | | Event G | 12:00:00 PM | 11:59:50 AM | | Event H | 12:00:11 PM | 12:00:01 PM | @@ -49,11 +49,9 @@ WATERMARK FOR time_col as time_col - INTERVAL 'string' time_unit ``` Supported `time_unit` values include: second, minute, hour, day, month, and year. For more details, see the `interval` data type under [Overview of data types](/docs/current/sql-data-types/). - -**NOTE** - + Currently, RisingWave only supports using one of the columns from the table as the watermark column. To use nested fields (e.g., fields in `STRUCT`), or perform expression evaluation on the input rows (e.g., casting data types), please refer to [generated columns](/docs/current/query-syntax-generated-columns/). - + ### Example diff --git a/python/python.mdx b/python-sdk/intro.mdx similarity index 60% rename from python/python.mdx rename to python-sdk/intro.mdx index 03eaba1d..34fbd798 100644 --- a/python/python.mdx +++ b/python-sdk/intro.mdx @@ -1,11 +1,10 @@ --- -title: Python -description: Use RisingWave in your Python application +title: Python SDK +description: Describes how to use the purpose-built Python SDK by RisingWave to build event-driven applications. --- -RisingWave provides a Python SDK [`risingwave-py`](https://pypi.org/project/risingwave-py/) (currently under public preview) to help user develops event-driven applications. - -As RisingWave is wire-compatible with PostgreSQL, you can also use third-party PostgreSQL drivers like `psycopg2` and `sqlalchemy` to interact with RisingWave from your Python applications. +RisingWave provides a Python SDK [`risingwave-py`](https://pypi.org/project/risingwave-py/) (currently in public preview) to help users develop event-driven applications. +This SDK provides a simple way to perform ad-hoc queries, subscribe to changes, and define event handlers for tables and materialized views, making it easier to integrate real-time data into applications. ## Use `risingwave-py` to connect to RisingWave @@ -192,143 +191,4 @@ threading.Thread( # rw.insert(table_name="test", data=df) ``` -For more details, please refer to the `risingwave-py` [GitHub repo](https://github.com/risingwavelabs/risingwave-py). - -## Use `psycopg2` to connect to RisingWave - -In this section, we use the [`psycopg2`](https://pypi.org/project/psycopg2/) driver to connect to RisingWave. - -### Run RisingWave - -To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart). - - -### Install the `psgcopg2` driver - -For information about how to install `psycopg` and the difference between `psycopg` and `psycopg-binary`, see the [official psycopg documentation](https://www.psycopg.org/docs/install.html). - - -### Connect to RisingWave - -To connect to RisingWave via `psycopg2`: - -```python -import psycopg2 - -conn = psycopg2.connect(host="127.0.0.1", port=4566, user="root", dbname="dev") -``` - -### Create a source - -The code below creates a source `walk` with the `datagen` connector. The `datagen` connector is used to generate mock data. The `walk` source consists of two columns, `distance` and `duration`, which respectively represent the distance and the duration of a walk. The source is a simplified version of the data that is tracked by smart watches. - -```python -import psycopg2 - -conn = psycopg2.connect(host="localhost", port=4566, user="root", dbname="dev") # Connect to RisingWave. -conn.autocommit = True # Set queries to be automatically committed. - -with conn.cursor() as cur: - cur.execute(""" -CREATE TABLE walk(distance INT, duration INT) -WITH ( - connector = 'datagen', - fields.distance.kind = 'sequence', - fields.distance.start = '1', - fields.distance.end = '60', - fields.duration.kind = 'sequence', - fields.duration.start = '1', - fields.duration.end = '30', - datagen.rows.per.second='15', - datagen.split.num = '1' -) FORMAT PLAIN ENCODE JSON""") # Execute the query. - -conn.close() # Close the connection. -``` - -All the code examples in this guide include a section for connecting to RisingWave. If you perform multiple actions within one connection session, you do not need to repeat this section. - - - -### Create a materialized view - -The code in this section creates a materialized view `counter` to capture the latest total distance and duration. - -```python -import psycopg2 - -conn = psycopg2.connect(host="localhost", port=4566, user="root", dbname="dev") -conn.autocommit = True - -with conn.cursor() as cur: - cur.execute("""CREATE MATERIALIZED VIEW counter - AS SELECT - SUM(distance) as total_distance, - SUM(duration) as total_duration - FROM walk;""") - -conn.close() -``` - -### Query a materialized view - -The code in this section queries the materialized view `counter` to get real-time data. - -```python -import psycopg2 - -conn = psycopg2.connect(host="localhost", port=4566, user="root", dbname="dev") -conn.autocommit = True - -with conn.cursor() as cur: - cur.execute("SELECT * FROM counter;") - print(cur.fetchall()) -conn.close() -``` - -## Use `sqlalchemy` to connect to RisingWave - -In this section, we use the [SQLAlchemy](https://www.sqlalchemy.org) driver to connect to RisingWave. - -### Run RisingWave - -To learn about how to run RisingWave, see [Run RisingWave](../get-started/quickstart). - -### Install necessary Python packages - -Ensure you have Python3 installed. - -For more information about `sqlalchemy`, see the [SQLAlchemy](https://www.sqlalchemy.org). Refer to the documentation version that corresponds to the version of SQLAlchemy that you run. - -For information about how to install `psycopg-binary`, see the [official psycopg documentation](https://www.psycopg.org/docs/install.html). - -```terminal -pip3 install SQLAlchemy sqlalchemy-risingwave psycopg2-binary -``` - -### Connect to RisingWave - -To connect to RisingWave via `sqlalchemy`: - -```python -DB_URI = 'risingwave+psycopg2://root@risingwave-standalone:4566/dev' - -engine = create_engine(DB_URI) -``` - -Note that RisingWave does not provide direct compatibility with `sqlaclehmy-postgres` so `risingwave+psycopg2` is used as the URI scheme. The rest of the URL follows the same format as the PostgreSQL driver. - -### Create a source - -The code below creates a table `users` using the engine created in the previous section. - -```python -with engine.connect() as conn: - conn.execute("""CREATE TABLE IF NOT EXISTS users ( - id INTEGER PRIMARY KEY, - name VARCHAR, - age INTEGER)""") -``` - -You can create materialized views and query from materialized views using the same format shown above. - +For more details, please refer to the `risingwave-py` [GitHub repo](https://github.com/risingwavelabs/risingwave-py). \ No newline at end of file diff --git a/sql/commands/sql-alter-connection.mdx b/sql/commands/sql-alter-connection.mdx index 5cd9285d..e2ea0077 100644 --- a/sql/commands/sql-alter-connection.mdx +++ b/sql/commands/sql-alter-connection.mdx @@ -22,7 +22,7 @@ ALTER CONNECTION connection_name ``` | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------- | | **SET SCHEMA** | This clause changes the schema of the connection. To change a connection's schema, you must also have CREATE privilege on the new schema. | | _schema\_name_ | Specify the schema to which you want to change. | diff --git a/sql/commands/sql-alter-database.mdx b/sql/commands/sql-alter-database.mdx index 349bd3c7..975ed039 100644 --- a/sql/commands/sql-alter-database.mdx +++ b/sql/commands/sql-alter-database.mdx @@ -21,7 +21,7 @@ ALTER DATABASE database_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | **OWNER TO** | This clause changes the owner of the database. To alter the owner, you must be able to SET ROLE to the new owning role, and you must have the CREATEDB privilege. Note that superusers have all these privileges automatically. | | _new\_user_ | The new owner you want to assign to the database. | @@ -38,7 +38,7 @@ ALTER DATABASE database_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | **RENAME TO** | This clause changes the name of the database. Only the database owner or a superuser can rename a database; non-superuser owners must also have the CREATEDB privilege. The current database cannot be renamed. (Connect to a different database if you need to do that.) | | _new\_name_ | The new name of the database. | diff --git a/sql/commands/sql-alter-function.mdx b/sql/commands/sql-alter-function.mdx index 1605279a..42d1bc13 100644 --- a/sql/commands/sql-alter-function.mdx +++ b/sql/commands/sql-alter-function.mdx @@ -22,7 +22,7 @@ ALTER FUNCTION function( argument_type [, ...] ) ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------ | | **SET SCHEMA** | This clause changes the schema of the function. To change a function's schema, you must also have CREATE privilege on the new schema. | | _schema\_name_ | Specify the schema to which you want to change. | diff --git a/sql/commands/sql-alter-index.mdx b/sql/commands/sql-alter-index.mdx index 3b731cff..8ca8aa1e 100644 --- a/sql/commands/sql-alter-index.mdx +++ b/sql/commands/sql-alter-index.mdx @@ -22,7 +22,7 @@ ALTER INDEX index_name ``` | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **RENAME TO** | This clause changes the name of the index. If the index is associated with a table constraint (either UNIQUE, PRIMARY KEY, or EXCLUDE), the constraint is renamed as well. There is no effect on the stored data. | | _new\_name_ | The new name of the index. | diff --git a/sql/commands/sql-alter-materialized-view.mdx b/sql/commands/sql-alter-materialized-view.mdx index 894682cb..5426af31 100644 --- a/sql/commands/sql-alter-materialized-view.mdx +++ b/sql/commands/sql-alter-materialized-view.mdx @@ -28,14 +28,13 @@ ALTER MATERIALIZED VIEW materialized_view_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------ | :----------------------------------------------------------------------------------------------------------------------------------- | | **OWNER TO** | This clause changes the owner of the materialized view. Note that this will cascadingly change all related internal objects as well. | | _new\_user_ | The new owner you want to assign to the materialized view. | ``` -- Change the owner of the materialized view named "materialized_view1" to user "user1" ALTER MATERIALIZED VIEW materialized_view1 OWNER TO user1; - ``` ### `SET SCHEMA` @@ -46,7 +45,7 @@ ALTER MATERIALIZED VIEW materialized_view_name ``` | Parameter or clause | Description | -| ------------------- | -------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------- | | **SET SCHEMA** | This clause moves the materialized view to a different schema. | | _schema\_name_ | The name of the schema to which the materialized view will be moved. | @@ -63,7 +62,7 @@ SET PARALLELISM = parallelism_number; ``` | Parameter or clause | Description | -| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **SET PARALLELISM** | This clause controls the degree of [parallelism](/docs/current/key-concepts/#parallelism) for the targeted [streaming job](/docs/current/key-concepts/#streaming-jobs). | | _parallelism\_number_ | This parameter can be ADAPTIVE or a fixed number, like 1, 2, 3, etc. Altering the parameter to ADAPTIVE will expand the streaming job's degree of parallelism to encompass all available units, whereas setting it to a fixed number will lock the job's parallelism at that specific figure. Setting it to 0 is equivalent to ADAPTIVE. | @@ -80,7 +79,7 @@ ALTER MATERIALIZED VIEW materialized_view_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------ | +| :------------------ | :----------------------------------------------------- | | **RENAME TO** | This clause changes the name of the materialized view. | | _new\_name_ | The new name of the materialized view. | @@ -96,7 +95,8 @@ ALTER MATERIALIZED VIEW mv_name SET BACKFILL_RATE_LIMIT { TO | = } { default | rate_limit_number }; ``` -Use this statement to modify the backfill rate limit of a materialized view being created. For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters). +This statement controls the rate limit of a newly created materialized view's backfilling process from upstream materialized views and sources. +For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/operate/view-configure-runtime-parameters/#how-to-view-runtime-parameters). ```sql Examples -- Pause the backfill @@ -107,5 +107,8 @@ ALTER MATERIALIZED VIEW mv1 SET BACKFILL_RATE_LIMIT=1; -- Disable the backfill ALTER MATERIALIZED VIEW mv1 SET BACKFILL_RATE_LIMIT=DEFAULT; - ``` + + +To modify the rate limit of the sources used in the materialized view, please refer to [SET SOURCE_RATE_LIMIT](/sql/commands/sql-alter-source#set-source-rate-limit). + diff --git a/sql/commands/sql-alter-schema.mdx b/sql/commands/sql-alter-schema.mdx index 84577513..410568bb 100644 --- a/sql/commands/sql-alter-schema.mdx +++ b/sql/commands/sql-alter-schema.mdx @@ -22,7 +22,7 @@ ALTER SCHEMA current_schema_name ``` | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **OWNER TO** | This clause changes the owner of the schema. To alter the owner, you must be able to SET ROLE to the new owning role, and you must have the CREATEDB privilege. Note that superusers have all these privileges automatically. | | _new\_user_ | The new owner you want to assign to the schema. | @@ -39,7 +39,7 @@ ALTER SCHEMA current_schema_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | **RENAME TO** | This clause changes the name of the schema. To rename a schema you must also have the CREATE privilege for the database. Note that superusers have the privilege automatically. | | _new\_name_ | The new name of the schema. | diff --git a/sql/commands/sql-alter-sink.mdx b/sql/commands/sql-alter-sink.mdx index c5fd41d7..ba829f01 100644 --- a/sql/commands/sql-alter-sink.mdx +++ b/sql/commands/sql-alter-sink.mdx @@ -28,7 +28,7 @@ ALTER SINK sink_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------ | | **OWNER TO** | This clause changes the owner of the sink. This will cascadingly change all related internal-objects as well. | | _new\_user_ | The new owner you want to assign to the sink. | @@ -45,7 +45,7 @@ ALTER SINK sink_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------- | +| :------------------ | :------------------------------------------------------ | | **SET SCHEMA** | This clause moves the sink to a different schema. | | _schema\_name_ | The name of the schema to which the sink will be moved. | @@ -62,7 +62,7 @@ SET PARALLELISM = parallelism_number; ``` | Parameter or clause | Description | -| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **SET PARALLELISM** | This clause controls the degree of [parallelism](/docs/current/key-concepts/#parallelism) for the targeted [streaming job](/docs/current/key-concepts/#streaming-jobs). | | _parallelism\_number_ | This parameter can be ADAPTIVE or a fixed number, like 1, 2, 3, etc. Altering the parameter to ADAPTIVE will expand the streaming job's degree of parallelism to encompass all available units, whereas setting it to a fixed number will lock the job's parallelism at that specific figure. Setting it to 0 is equivalent to ADAPTIVE. | @@ -79,7 +79,7 @@ ALTER SINK sink_name ``` | Parameter or clause | Description | -| ------------------- | ----------------------------------------- | +| :------------------ | :---------------------------------------- | | **RENAME TO** | This clause changes the name of the sink. | | _new\_name_ | The new name of the sink. | diff --git a/sql/commands/sql-alter-source.mdx b/sql/commands/sql-alter-source.mdx index 77905251..3a479ef9 100644 --- a/sql/commands/sql-alter-source.mdx +++ b/sql/commands/sql-alter-source.mdx @@ -22,7 +22,7 @@ ALTER SOURCE source_name ``` | Parameter or clause | Description | -| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **ADD COLUMN** | This clause adds a column to the specified source. | | _col\_name_ | The name of the new column you want to add to the source. | | _data\_type_ | The data type of the newly added column. With the struct data type, you can create a nested table. Elements in a nested table need to be enclosed with angle brackets (\<>). | @@ -33,13 +33,11 @@ ALTER SOURCE src1 ADD COLUMN v3 int; ``` - -**NOTE** - + * To alter columns in a source created with a schema registry, see [FORMAT and ENCODE options](/docs/current/sql-alter-source/#format-and-encode-options). * You cannot add a primary key column to a source or table in RisingWave. To modify the primary key of a source or table, you need to recreate the table. * You cannot remove a column from a source in RisingWave. If you intend to remove a column from a source, you'll need to drop the source and create the source again. - + ### `RENAME TO` @@ -49,7 +47,7 @@ ALTER SOURCE source_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------- | +| :------------------ | :------------------------------------------ | | **RENAME TO** | This clause changes the name of the source. | | _new\_source\_name_ | The new name of the source. | @@ -67,7 +65,7 @@ ALTER SOURCE current_source_name ``` | Parameter or clause | Description | -| ------------------- | ----------------------------------------------- | +| :------------------ | :---------------------------------------------- | | **OWNER TO** | This clause changes the owner of the source. | | _new\_user_ | The new owner you want to assign to the source. | @@ -84,7 +82,7 @@ ALTER SOURCE current_source_name ``` | Parameter or clause | Description | -| ------------------- | --------------------------------------------------------- | +| :------------------ | :-------------------------------------------------------- | | **SET SCHEMA** | This clause moves the source to a different schema. | | _schema\_name_ | The name of the schema to which the source will be moved. | @@ -127,13 +125,11 @@ ALTER SOURCE src_user FORMAT PLAIN ENCODE PROTOBUF( ); ``` - -**NOTE** - + Currently, it is not supported to modify the `data_format` and `data_encode`. Furthermore, when refreshing the schema registry of a source, it is not allowed to drop columns or change types. Another way of refreshing the schema is using the [REFRESH SCHEMA clause](#refresh-schema). - + ### `REFRESH SCHEMA` @@ -173,7 +169,15 @@ ALTER SOURCE source_name SET SOURCE_RATE_LIMIT { TO | = } { default | rate_limit_number }; ``` -Use this statement to modify the rate limit of a source. For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters). +Use this statement to modify the rate limit of a source. For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/operate/view-configure-runtime-parameters/#how-to-view-runtime-parameters). + + +For a newly created materialized view on a source with historical data e.g. Kafka source, it will backfill from +the source. The backfilling process will not be affected by the `SOURCE_RATE_LIMIT` +of the source. + +To modify the rate limit of the backfilling process, please refer to [SET BACKFILL_RATE_LIMIT](/sql/commands/sql-alter-materialized-view#set-backfill-rate-limit). + ```sql Example -- Alter the rate limit of a source to default diff --git a/sql/commands/sql-alter-table.mdx b/sql/commands/sql-alter-table.mdx index ed5198b6..9a9aed29 100644 --- a/sql/commands/sql-alter-table.mdx +++ b/sql/commands/sql-alter-table.mdx @@ -22,7 +22,7 @@ ALTER TABLE table_name ``` | Parameter or clause | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | **ADD \[ COLUMN \]** | This clause adds a new column to the table. COLUMN is optional. | | _column\_name_ | Specify the name of the column you want to add. | | _data\_type_ | The data type of the new column. | @@ -34,12 +34,10 @@ ALTER TABLE table_name ALTER TABLE employees ADD age int; ``` - -**NOTE** - + * If your table is defined with a schema registry, its columns can not be altered. * Columns added by this command cannot be used by any existing materialized views or indexes. You must create new materialized views or indexes to reference it. - + ### `DROP COLUMN` @@ -49,7 +47,7 @@ ALTER TABLE table_name ``` | Parameter or clause | Description | -| --------------------- | ------------------------------------------------------------------------------------------ | +| :-------------------- | :----------------------------------------------------------------------------------------- | | **DROP \[ COLUMN \]** | This clause drops an existing column from a table. COLUMN is optional. | | **IF EXISTS** | Do not return an error if the specified column does not exist. A notice is issued instead. | | _column\_name_ | Specify the column you want to remove. | @@ -59,13 +57,11 @@ ALTER TABLE table_name ALTER TABLE employees DROP fax; ``` - -**NOTE** - + * If your table is defined with a schema registry, you can only change the table schema by `ALTER TABLE t REFRESH SCHEMA`. One exception is you can drop the generated columns even if the schema is defined with a schema registry. Note that dropping these generated columns will trigger a schema refresh. * You cannot drop columns referenced by materialized views or indexes. * To drop a column referenced by a generated column, you must first drop the generated column. - + ### `OWNER TO` @@ -75,7 +71,7 @@ ALTER TABLE table_name ``` | Parameter or clause | Description | -| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **OWNER TO** | This clause changes the owner of the table to the specified user. It will cascadingly change all related internal objects as well, and the associated indexes will be changed too. | | _new\_user_ | Specify the user you want to assign to the table. | @@ -94,7 +90,7 @@ ALTER TABLE table_name ``` | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------- | | **SET SCHEMA** | This clause moves the table into another schema. Associated indexes, constraints, and sequences owned by table columns are moved as well. | | _schema\_name_ | Specify the schema to which the table will be moved. | @@ -111,7 +107,7 @@ SET PARALLELISM = parallelism_number; ``` | Parameter or clause | Description | -| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **SET PARALLELISM** | This clause controls the degree of [parallelism](/docs/current/key-concepts/#parallelism) for the targeted [streaming job](/docs/current/key-concepts/#streaming-jobs). | | _parallelism\_number_ | This parameter can be ADAPTIVE or a fixed number, like 1, 2, 3, etc. Altering the parameter to ADAPTIVE will expand the streaming job's degree of parallelism to encompass all available units, whereas setting it to a fixed number will lock the job's parallelism at that specific figure. Setting it to 0 is equivalent to ADAPTIVE. After setting the parallelism, the parallelism status of a table can be observed within the internal [rw\_table\_fragments](/docs/current/view-configure-runtime-parameters/) table or the [rw\_fragments](/docs/current/view-configure-runtime-parameters/)table. | @@ -179,7 +175,7 @@ ALTER TABLE table_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------ | +| :------------------ | :----------------------------------------- | | **RENAME TO** | This clause changes the name of the table. | | _new\_name_ | The new name of the table. | @@ -202,11 +198,9 @@ This command alters the schema registry of a table created with connectors. ALTER TABLE t_user REFRESH SCHEMA; ``` - -**NOTE** - + If a downstream fragment references a column that is either missing or has undergone a type change in the updated schema, the command will be declined. - + ### `SET SOURCE_RATE_LIMIT` @@ -215,7 +209,8 @@ ALTER TABLE table_name SET SOURCE_RATE_LIMIT { TO | = } { default | rate_limit_number }; ``` -Use this statement to modify the rate limit of tables that have a source. For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters). +For tables with connector, this statement controls the rate limit of the associated source. +For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters). ```sql Example -- Create a table with source @@ -238,13 +233,14 @@ ALTER TABLE kafka_source SET source_rate_limit TO default; ALTER TABLE kafka_source SET source_rate_limit TO 1000; ``` -### `SET BACKFILL_RATE_LIMIT`[] +### `SET BACKFILL_RATE_LIMIT` ```sql ALTER TABLE table_name SET BACKFILL_RATE_LIMIT { TO | = } { default | rate_limit_number }; ``` -Use this statement to modify the backfill rate limit of a CDC table being created from a CDC source. For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters). +For CDC table created from a CDC source, this statement controls the rate limit of backfilling from the CDC database. +For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/operate/view-configure-runtime-parameters/#how-to-view-runtime-parameters). ```sql Examples -- Pause the backfill diff --git a/sql/commands/sql-alter-user.mdx b/sql/commands/sql-alter-user.mdx index 3f55741c..ace8cdf5 100644 --- a/sql/commands/sql-alter-user.mdx +++ b/sql/commands/sql-alter-user.mdx @@ -27,18 +27,16 @@ ALTER USER user_name WITH oauth ( ## Parameters | Parameter or clause | Description | -| -------------------- | ----------------------------------------------------------------------------------------------------------------------- | +| :------------------- | :---------------------------------------------------------------------------------------------------------------------- | | _user\_name_ | The name of the user to be modified. | | _new\_user\_name_ | The new name of the user. | | _system\_permission_ | See [the options for system permissions of the CREATE USER command](/docs/current/sql-create-user/#system-permissions). | For the alter user authentication method, the `jwks_url` and `issuer` parameters are mandatory. On the other hand, `other_params_should_match` is an optional parameter that will be validated against `jwt.claims`. Ensure that all keys in the options are in **lowercase**. - -**NOTE** - + `kid` and `alg` are required in the header of JWT, and `kid` is also required in the JWKs returned by the JWKS server. All parameters set in user creation (except `jwks_url`) will be checked in the claims of JWT. Any mismatch will deny the login process. - + ## Examples diff --git a/sql/commands/sql-alter-view.mdx b/sql/commands/sql-alter-view.mdx index 48d195ee..a3cdb06c 100644 --- a/sql/commands/sql-alter-view.mdx +++ b/sql/commands/sql-alter-view.mdx @@ -22,7 +22,7 @@ ALTER VIEW view_name ``` | Parameter or clause | Description | -| ------------------- | --------------------------------------------- | +| :------------------ | :-------------------------------------------- | | **OWNER TO** | This clause changes the owner of the view. | | _new\_user_ | The new owner you want to assign to the view. | @@ -39,7 +39,7 @@ ALTER VIEW view_name ``` | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------- | +| :------------------ | :------------------------------------------------------ | | **SET SCHEMA** | This clause moves the view to a different schema. | | _schema\_name_ | The name of the schema to which the view will be moved. | @@ -56,7 +56,7 @@ ALTER VIEW view_name ``` | Parameter or clause | Description | -| ------------------- | ----------------------------------------- | +| :------------------ | :---------------------------------------- | | **RENAME TO** | This clause changes the name of the view. | | _new\_name_ | The new name of the view. | diff --git a/sql/commands/sql-cancel-jobs.mdx b/sql/commands/sql-cancel-jobs.mdx index f3e9e011..3932b32d 100644 --- a/sql/commands/sql-cancel-jobs.mdx +++ b/sql/commands/sql-cancel-jobs.mdx @@ -44,7 +44,7 @@ Id diff --git a/sql/commands/sql-comment-on.mdx b/sql/commands/sql-comment-on.mdx index f1a9e365..b21b16b2 100644 --- a/sql/commands/sql-comment-on.mdx +++ b/sql/commands/sql-comment-on.mdx @@ -14,7 +14,7 @@ COMMENT ON . IS ## Parameters | Parameter | Notes | -| ----------------------------- | ---------------------------------------------------------------------------------------------------------- | +| :---------------------------- | :--------------------------------------------------------------------------------------------------------- | | _object\_type_ | Type of the object that you want to add comments to. Allowed values: TABLE, COLUMN. | | _relation\_name.object\_name_ | Name of the object that you want to add comments to. For columns, you also need to specify the table name. | | _comment_ | Comment that you want to add. | diff --git a/sql/commands/sql-create-aggregate.mdx b/sql/commands/sql-create-aggregate.mdx index 762dc0b0..a6188e7e 100644 --- a/sql/commands/sql-create-aggregate.mdx +++ b/sql/commands/sql-create-aggregate.mdx @@ -15,7 +15,7 @@ CREATE AGGREGATE function_name ( argument_type [, ...] ) ### Parameters | Parameter or clause | Description | -| -------------------------- | ----------------------------------------------------------------------------------------------------- | +| :------------------------- | :---------------------------------------------------------------------------------------------------- | | _function\_name_ | The name of the aggregate function that you want to declare in RisingWave. | | _argument\_type_ | The data type of the input parameter(s) that the function expects to receive. | | **RETURNS** _return\_type_ | The data type of the return value from the aggregate function. | diff --git a/sql/commands/sql-create-connection.mdx b/sql/commands/sql-create-connection.mdx index 5def6f28..e9193aaf 100644 --- a/sql/commands/sql-create-connection.mdx +++ b/sql/commands/sql-create-connection.mdx @@ -19,18 +19,16 @@ WITH ( All WITH options are required unless stated otherwise. | Parameter or clause | Description | -| ------------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------------------------- | | _connection\_name_ | The name of the connection to be created. | | type | The type of connection. | | provider | The provider of the connection. | | service.name | The service name of the endpoint service. | | tags | Optional. The AWS tags used to check for resource leakage. This parameter should have the format: key1=value1, key2=value2, .... | - -**NOTE** - + You can either tag the VPC endpoints by specifying the `tags` parameter when using the `CREATE CONNECTION` command or by specifying the environment variable `RW_PRIVATELINK_ENDPOINT_DEFAULT_TAGS`. When specifying the tags, follow the format of `key1=value1, key2=value2, ...`. If both are specified, the tags specified in the environment variable will be appended to the ones specified by the `tags` parameter. - + ## Example diff --git a/sql/commands/sql-create-database.mdx b/sql/commands/sql-create-database.mdx index 7e26659b..dc569f6d 100644 --- a/sql/commands/sql-create-database.mdx +++ b/sql/commands/sql-create-database.mdx @@ -6,25 +6,26 @@ description: "Use the `CREATE DATABASE` command to create a new database." ## Syntax ```sql -CREATE DATABASE [ IF NOT EXISTS ] database_name; +CREATE DATABASE [ IF NOT EXISTS ] database_name + [ WITH ] [ OWNER [=] user_name ]; ``` ## Parameters | Parameter or clause | Description | -| ------------------------ | --------------------------------------------------------------------------------------------- | +| :----------------------- | :-------------------------------------------------------------------------------------------- | | _database_name_ | The name of the database to be created. | | **IF NOT EXISTS** clause | Creates a database if the database name has not already been used. Otherwise throws an error. | +| **OWNER [=] user_name** clause | Specifies which user owns the database to be created. | ## Example ```sql -CREATE DATABASE IF NOT EXISTS travel; +CREATE DATABASE IF NOT EXISTS travel + WITH OWNER = travel_admin; ``` - -**NOTE** - + Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). - + diff --git a/sql/commands/sql-create-function.mdx b/sql/commands/sql-create-function.mdx index 0af52e8d..97f86a2a 100644 --- a/sql/commands/sql-create-function.mdx +++ b/sql/commands/sql-create-function.mdx @@ -24,7 +24,7 @@ CREATE FUNCTION function_name ( argument_type [, ...] ) ### Parameters | Parameter or clause | Description | -| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _function\_name_ | The name of the UDF that you want to declare in RisingWave. | | _argument\_type_ | The data type of the input parameter(s) that the UDF expects to receive. | | **RETURNS** _return\_type_ | Use this if the function returns a single value (i.e., scalar). It specifies the data type of the return value from the UDF.The struct type, which can contain multiple values, is supported. But the field names must be consistent between the programming language and SQL definitions, or it will be considered a type mismatch. | diff --git a/sql/commands/sql-create-index.mdx b/sql/commands/sql-create-index.mdx index 0d1b93b5..c45d0a15 100644 --- a/sql/commands/sql-create-index.mdx +++ b/sql/commands/sql-create-index.mdx @@ -14,7 +14,7 @@ CREATE INDEX [ IF NOT EXISTS ] index_name ON object_name ( index_column [ ASC | ## Parameters | Parameter or clause | Description | -| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **IF NOT EXISTS** | This clause is used to check if an index with the specified name already exists before creating a new index. If the index already exists, the clause prevents an error from occurring and the index creation operation is skipped. A notice is issued in this case. Note that there is no guarantee that the existing index is anything like the one that would have been created. Index name is required when IF NOT EXISTS is specified. | | _index\_name_ | The name of the index to be created. | | _object\_name_ | The name of the table or materialized view where the index is created. | diff --git a/sql/commands/sql-create-mv.mdx b/sql/commands/sql-create-mv.mdx index ee45bfd2..37323b4f 100644 --- a/sql/commands/sql-create-mv.mdx +++ b/sql/commands/sql-create-mv.mdx @@ -10,31 +10,24 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] mv_name AS select_query; ``` -**TIP** - `CREATE MATERIALIZED VIEW` will first **backfill** historical data from the referenced relations, and completion time varies based on the volume of data to be backfilled. To perform the operations in the background, you can execute `SET BACKGROUND_DDL=true;` before running the `CREATE MATERIALIZED VIEW` statement. See details in [SET BACKGROUND\_DDL](/docs/current/sql-set-background-ddl/). - ## Parameters | Parameter or clause | Description | -| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------- | | _mv\_name_ | The name of the materialized view to be created. | | _select\_query_ | A SELECT query that retrieves data for the materialized view. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | - -**NOTE** - + Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). - - - -**NOTE** + + The `ORDER BY` clause in the `CREATE MATERIALIZED VIEW` statement is allowed but not considered as part of the definition of the materialized view. It's only used in the initial creation of the materialized view and not during refreshes. - + ## Examples diff --git a/sql/commands/sql-create-schema.mdx b/sql/commands/sql-create-schema.mdx index ff6b47e9..74872311 100644 --- a/sql/commands/sql-create-schema.mdx +++ b/sql/commands/sql-create-schema.mdx @@ -14,7 +14,7 @@ CREATE SCHEMA [ IF NOT EXISTS ] AUTHORIZATION user_name; ## Parameters | Parameter or clause | Description | -| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------ | +| :----------------------- | :----------------------------------------------------------------------------------------------------------------------------------- | | _schema\_name_ | The name of the schema to be created. | | **IF NOT EXISTS** clause | Creates a schema if the schema name has not already been used. Otherwise throws an error. | | _database\_name_ | The name of the database for the schema to be created in. If not specified, the schema will be created in the default database dev. | @@ -27,12 +27,10 @@ CREATE SCHEMA [ IF NOT EXISTS ] AUTHORIZATION user_name; CREATE SCHEMA IF NOT EXISTS schema_1; ``` - -**NOTE** - + Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). - + ```sql Examples of AUTHORIZATION clause diff --git a/sql/commands/sql-create-secret.mdx b/sql/commands/sql-create-secret.mdx index f3a08cb3..8849c3d6 100644 --- a/sql/commands/sql-create-secret.mdx +++ b/sql/commands/sql-create-secret.mdx @@ -12,7 +12,7 @@ CREATE SECRET secret_name WITH ( backend = 'meta') AS 'your_secret'; ## Parameters | Parameter or Clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------- | | _secret\_name_ | The name of the secret to be created. This should be a unique identifier within the system. | | _backend_ | Specifies the backend where the secret will be stored. Currently, only the meta backend is supported. | | _your\_secret_ | The secret value that you wish to store securely. | diff --git a/sql/commands/sql-create-sink-into.mdx b/sql/commands/sql-create-sink-into.mdx index 59728f8f..a8f4a085 100644 --- a/sql/commands/sql-create-sink-into.mdx +++ b/sql/commands/sql-create-sink-into.mdx @@ -13,21 +13,17 @@ CREATE SINK [ IF NOT EXISTS ] sink_name INTO table_name [ ( col_name [ , ... ] ) ## Parameters | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | sink\_name | The name of the sink. If a schema name is given (for example, CREATE SINK\.\ ...), then the sink is created in the specified schema. Otherwise it is created in the current schema. | | col\_name | The corresponding table columns in the sink result. For those columns not listed, it will be inserted as the default value defined in the table. | - -**NOTE** - + A table without a primary key can only accept the append-only sink. - - - -**NOTE** + + Currently, if there are sinks in the table, the table cannot be altered to add or drop columns. - + ## Examples diff --git a/sql/commands/sql-create-sink.mdx b/sql/commands/sql-create-sink.mdx index eaf27e6b..97aef14d 100644 --- a/sql/commands/sql-create-sink.mdx +++ b/sql/commands/sql-create-sink.mdx @@ -23,7 +23,7 @@ WITH ( ## Parameters | Parameter | Description | -| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _sink\_name_ | The name of the sink. | | _col\_name_ | The name of the column. | | _sink\_from_ | Specify the direct data source for output. It can be a materialized view or a table. | @@ -32,11 +32,9 @@ WITH ( | **WITH** clause | Specify the connector settings here if trying to store all the sink data. See [Supported sinks](#supported-sinks) for the full list of supported sink as well as links to specific connector pages detailing the syntax for each sink. | | **FORMAT** and **ENCODE** options | Optional. Specify the data format and the encoding format of the sink data. It is only used for Kafka, Kinesis, Pulsar, and Redis sinks. | - -**NOTE** - + Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse. - + ## Supported sinks @@ -95,10 +93,8 @@ Click a sink name to see the SQL syntax, options, and sample statement of sinkin - -**NOTE** - + Timestamptz values are stored in UTC. When sinking downstream, the representation of timestamptz is configurable. By default, it is in the format `2023-11-11T18:30:09.453000Z`. - + diff --git a/sql/commands/sql-create-source.mdx b/sql/commands/sql-create-source.mdx index 723efe3c..c0954bda 100644 --- a/sql/commands/sql-create-source.mdx +++ b/sql/commands/sql-create-source.mdx @@ -53,16 +53,14 @@ FORMAT upsert ENCODE AVRO ( ); ``` - -**NOTE** - + The generated column is created in RisingWave and will not be accessed through the external connector. Therefore, if the external upstream system has a schema, it does not need to include the generated column within the table's schema in the external system. - + ## Parameter | Parameter | Description | -| --------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _source\_name_ | The name of the source. If a schema name is given (for example, CREATE SOURCE \.\ ...), then the table is created in the specified schema. Otherwise it is created in the current schema. | | _col\_name_ | The name of a column. | | _data\_type_ | The data type of a column. With the struct data type, you can create a nested table. Elements in a nested table need to be enclosed with angle brackets (\<>). | @@ -72,12 +70,10 @@ The generated column is created in RisingWave and will not be accessed through t | **WITH** clause | Specify the connector settings here if trying to store all the source data. See [Supported sources](/docs/current/supported-sources-and-formats/#supported-sources) for the full list of supported source as well as links to specific connector pages detailing the syntax for each source. | | **FORMAT** and **ENCODE** options | Specify the data format and the encoding format of the source data. To learn about the supported data formats, see [Supported formats](/docs/current/supported-sources-and-formats/#supported-formats). | - -**NOTE** - + Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse. - + ## Watermarks RisingWave supports generating watermarks when creating a source. Watermarks are like markers or signals that track the progress of event time, allowing you to process events within their corresponding time windows. The [WATERMARK](/docs/current/watermarks/) clause should be used within the `schema_definition`. For more information on how to create a watermark, see [Watermarks](/docs/current/watermarks/). @@ -92,6 +88,86 @@ If Kafka is part of your technical stack, you can also use the Kafka connector i For complete step-to-step guides about ingesting MySQL and PostgreSQL data using both approaches, see [Ingest data from MySQL](/docs/current/ingest-from-mysql-cdc/) and [Ingest data from PostgreSQL](/docs/current/ingest-from-postgres-cdc/). +## Shared source + +Shared source improves resource utilization and data consistency when working with Kafka sources in RisingWave. This will only affect Kafka sources created after the version updated and will not affect any existing Kafka sources. + + +**PUBLIC PREVIEW** + +This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). + + +### Configure + +Shared source is enabled by default. You can also set the session variable `streaming_use_shared_source` to control whether to enable it. + +```sql +# change the config in the current session +SET streaming_use_shared_source=[true|false]; + +# change the default value of the session variable in the cluster +# (the current session is not affected) +ALTER SYSTEM SET streaming_use_shared_source=[true|false]; +``` + +To completely disable it at the cluster level, go to [`risingwave.toml`](https://github.com/risingwavelabs/risingwave/blob/main/src/config/example.toml#L146) configuration file, and set the `stream_enable_shared_source` to `false`. + +### Compared with non-shared source + +With non-shared sources, when using the `CREATE SOURCE` statement: +- No streaming jobs would be instantiated. A source is just a set of metadata stored in the catalog. +- Only when a materialized view or sink references the source, a `SourceExecutor` will be created to start the process of data ingestion. + +This leads to increased resource usage and potential inconsistencies: +- Each `SourceExecutor` consumed Kafka resources independently, adding pressure to both the Kafka broker and RisingWave. +- Independent `SourceExecutor` instances could result in different consumption progress, causing temporary inconsistencies when joining materialized views. + + + + + +With shared sources, when using the `CREATE SOURCE` statement: +- It will instantiate a single `SourceExecutor` immediately. +- All materialized views referencing the same source share the `SourceExecutor`. +- The downstream materialized views will only forwards data from the upstream sources, instead of consuming from Kafka independently. + +This improves resource utilization and consistency. + + + + + +When creating a materialized view, RisingWave backfills historical data from Kafka. The process blocks the DDL statement until backfill completes. + +- To configure this behavior, use the [SET BACKGROUND_DDL](/sql/commands/sql-set-background-ddl) command. This is similar to the backfilling procedure when creating a materialized view on tables and materialized views. + +- To monitoring backfill progress, use the [SHOW JOBS](/sql/commands/sql-show-jobs) command or check `Kafka Consumer Lag Size` in the Grafana dashboard (under `Streaming`). + + +If you set up a retention policy or if the external system can only be accessed once (like message queues), and the data is no longer available, any newly created materialized views won’t be able to backfill the complete historical data. This can lead to inconsistencies with earlier materialized views. + + +### Compared with table + +A `CREATE TABLE` statement can provide similar benefits to shared sources, except that it needs to persist all consumed data. + +For table with connector, downstream materialized views backfill historical data from the table instead of external sources, which may be more efficient and cause less pressure to the external system. This also gives table stronger consistency guarantee, as historical data will be ensured to be present. + +Tables offer other features that enhance their utility in data ingestion workflows. See [Table with connectors](/ingestion/overview#table-with-connectors). + + + + + + +**LIMITATION** + +Currently, shared source is only applicable to Kafka sources. Other sources are unaffected. We plan to gradually upgrade other sources to the be shared as well in the future. + +Shared sources do not support `ALTER SOURCE`. Use non-shared sources if you require this functionality. + + ## See also diff --git a/sql/commands/sql-create-table.mdx b/sql/commands/sql-create-table.mdx index a9336a81..0a7676b5 100644 --- a/sql/commands/sql-create-table.mdx +++ b/sql/commands/sql-create-table.mdx @@ -6,7 +6,7 @@ description: "Use the `CREATE TABLE` command to create a new table. Tables consi Rows can be added using the [INSERT](/docs/current/sql-insert/) command. When creating a table, you can specify connector settings and data format. -**INFO** + If you choose not to persist the data from the source in RisingWave, use [CREATE SOURCE](/docs/current/sql-create-source/) instead. For more details about the differences between sources and tables, see [here](/docs/current/data-ingestion/#table-with-connectors). @@ -34,40 +34,40 @@ CREATE TABLE [ IF NOT EXISTS ] table_name ( ## Notes -For tables with primary key constraints, if you insert a new data record with an existing key, the new record will overwrite the existing record. +- For tables with primary key constraints, if you insert a new data record with an existing key, the new record will overwrite the existing record. -A [generated column](/docs/current/query-syntax-generated-columns/) that is defined with non-deterministic functions cannot be specified as part of the primary key. For example, if `A1` is defined as `current_timestamp()`, then it cannot be part of the primary key. +- A [generated column](/docs/current/query-syntax-generated-columns/) that is defined with non-deterministic functions cannot be specified as part of the primary key. For example, if `A1` is defined as `current_timestamp()`, then it cannot be part of the primary key. -Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). +- Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). -The syntax for creating a table with connector settings and the supported connectors are the same as for creating a source. See [CREATE SOURCE](/docs/current/sql-create-source/) for a full list of supported connectors and data formats. +- The syntax for creating a table with connector settings and the supported connectors are the same as for creating a source. See [CREATE SOURCE](/docs/current/sql-create-source/) for a full list of supported connectors and data formats. -To know when a data record is loaded to RisingWave, you can define a column that is generated based on the processing time (` timestamptz AS proctime()`) when creating the table or source. See also [proctime()](/docs/current/sql-function-datetime/#proctime). +- To know when a data record is loaded to RisingWave, you can define a column that is generated based on the processing time (` timestamptz AS proctime()`) when creating the table or source. See also [proctime()](/docs/current/sql-function-datetime/#proctime). -For a table with schema from external connector, use `*` to represent all columns from the external connector first, so that you can define a generated column on table with an external connector. See the example below. +- For a table with schema from external connector, use `*` to represent all columns from the external connector first, so that you can define a generated column on table with an external connector. See the example below -```js -CREATE TABLE from_kafka ( - *, - gen_i32_field INT AS int32_field + 2, - PRIMARY KEY (some_key) -) -INCLUDE KEY AS some_key -[INCLUDE { header | offset | partition | timestamp } [AS ]] -WITH ( - connector = 'kafka', - topic = 'test-rw-sink-upsert-avro', - properties.bootstrap.server = 'message_queue:29092' -) -FORMAT upsert ENCODE AVRO ( - schema.registry = 'http://message_queue:8081' -); -``` + ```sql + CREATE TABLE from_kafka ( + *, + gen_i32_field INT AS int32_field + 2, + PRIMARY KEY (some_key) + ) + INCLUDE KEY AS some_key + [INCLUDE { header | offset | partition | timestamp } [AS ]] + WITH ( + connector = 'kafka', + topic = 'test-rw-sink-upsert-avro', + properties.bootstrap.server = 'message_queue:29092' + ) + FORMAT upsert ENCODE AVRO ( + schema.registry = 'http://message_queue:8081' + ); + ``` ## Parameters -| Parameter or clause | Description | -| --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Parameter or clause | Description | +| :-------------------------------- | :------------ | | table\_name | The name of the table. If a schema name is given (for example, CREATE TABLE \.\ ...), then the table is created in the specified schema. Otherwise it is created in the current schema. | | col\_name | The name of a column. | | data\_type | The data type of a column. With the struct data type, you can create a nested table. Elements in a nested table need to be enclosed with angle brackets (\<>). | @@ -80,11 +80,9 @@ FORMAT upsert ENCODE AVRO ( | **WITH** clause | Specify the connector settings here if trying to store all the source data. See the [Data ingestion](/docs/current/data-ingestion/) page for the full list of supported source as well as links to specific connector pages detailing the syntax for each source. | | **FORMAT** and **ENCODE** options | Specify the data format and the encoding format of the source data. To learn about the supported data formats, see [Data formats](/docs/current/sql-create-source/#supported-formats). | - -**NOTE** - + Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse. - + ## Watermarks @@ -106,18 +104,14 @@ The action could one of the following. A column not in the primary key can be sp `VERSION COLUMN` is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage). - -**NOTE** - + The delete and update operation on the table cannot break the primary key constraint on the table, so the option will not take effect for those cases. - - - -**NOTE** + + When `DO UPDATE IF NOT NULL` behavior is applied, `DEFAULT` clause is not allowed on the table's columns. - + ## Example The statement below creates a table that has three columns. diff --git a/sql/commands/sql-create-user.mdx b/sql/commands/sql-create-user.mdx index 595c468c..d4767418 100644 --- a/sql/commands/sql-create-user.mdx +++ b/sql/commands/sql-create-user.mdx @@ -14,7 +14,7 @@ If you do not want password authentication for the user, omit the PASSWORD optio Below are the options for system permissions. | Option | Description | -| ------------ | --------------------------------------------------------------------------------------------------------------------------- | +| :----------- | :-------------------------------------------------------------------------------------------------------------------------- | | SUPERUSER | Grants the user superuser permission. A superuser can override all access restrictions. NOSUPERUSER is the default value. | | NOSUPERUSER | Denies the user superuser permission. A superuser can override all access restrictions. NOSUPERUSER is the default value. | | CREATEDB | Grants the user the permission to create databases. NOCREATEDB is the default value. | @@ -37,11 +37,9 @@ CREATE USER user_name WITH oauth ( The `jwks_url` and `issuer` parameters are mandatory. On the other hand, `other_params_should_match` is an optional parameter that will be validated against `jwt.claims`. Please ensure that all keys in the options are in **lowercase**. - -**NOTE** - + `kid` and `alg` are required in the header of JWT, and `kid` is also required in the JWKs returned by the JWKS server. All parameters set in user creation (except `jwks_url`) will be checked in the claims of JWT. Any mismatch will deny the login process. - + ## Examples @@ -55,8 +53,6 @@ CREATE USER user1 ``` -**TIP** - You can connect to RisingWave with the newly created user account. @@ -77,11 +73,9 @@ psql -h localhost -p 4566 -d dev -U user1 Enter the password to log in. - -**NOTE** - + Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). - + ### Create a user with OAuth authentication diff --git a/sql/commands/sql-create-view.mdx b/sql/commands/sql-create-view.mdx index d80cdd41..c798bbed 100644 --- a/sql/commands/sql-create-view.mdx +++ b/sql/commands/sql-create-view.mdx @@ -11,7 +11,7 @@ CREATE VIEW [IF NOT EXISTS] view_name [ ( column_name [, ...] ) ] AS select_quer ## Parameters | Parameter | Description | -| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------- | :------------------------------------------------------------------------------------------------------------------------------------------ | | _mv\_name_ | The name of the view to be created. | | _column\_name_ | Specify the columns of the view. | | _select\_query_ | A SELECT query that retrieves data for the view. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. | @@ -63,8 +63,6 @@ SELECT * FROM v3; 601 | 3 | 0zsMbNLxQh9yYtHh ``` - -**NOTE** - + Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/). - + diff --git a/sql/commands/sql-delete.mdx b/sql/commands/sql-delete.mdx index 8d26496c..8f355b3b 100644 --- a/sql/commands/sql-delete.mdx +++ b/sql/commands/sql-delete.mdx @@ -4,7 +4,7 @@ description: "Use the `DELETE` command to permanently remove rows from a table." --- -**INFO** + Call [FLUSH](/docs/current/sql-flush/) after `DELETE` to persist the changes to storage. This ensures that the changes are committed and visible for subsequent reads. @@ -19,7 +19,7 @@ WHERE condition ## Parameters | Parameter or clause | Description | -| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _table\_name_ | The table where you want to remove records. | | **WHERE** _condition_ | Specify which rows you want to remove using an expression that returns a boolean value. Rows for which this expression returns true will be removed. If you omit the WHERE clause, all rows of records in the table will be deleted but the table structure will be kept. | | **RETURNING** | Returns the values of any column based on each deleted row. | diff --git a/sql/commands/sql-describe.mdx b/sql/commands/sql-describe.mdx index 001a7851..b5155b49 100644 --- a/sql/commands/sql-describe.mdx +++ b/sql/commands/sql-describe.mdx @@ -6,8 +6,6 @@ description: "Use the `DESCRIBE` command to view columns in the specified table, `DESCRIBE` is a shortcut for [SHOW COLUMNS](/docs/current/sql-show-columns/). -**TIP** - `DESCRIBE` also lists the indexes on a table or materialized view, whereas `SHOW COLUMNS` doesn't. @@ -20,7 +18,7 @@ DESCRIBE relation_name; ## Parameters | Parameter or clause | Description | -| ------------------- | -------------------------------------------------------------------------------- | +| :------------------ | :------------------------------------------------------------------------------- | | _relation\_name_ | The table, source, sink, view or materialized view whose columns will be listed. | ## Examples @@ -46,7 +44,7 @@ DESCRIBE customers; ```bash | Name | Type | Is Hidden | Description | -| ------------------- | --------------------------------------------------------------------- | --------- | ----------------------------------- | +| :------------------ | :-------------------------------------------------------------------- | :-------- | :---------------------------------- | | customer_id | bigint | false | Unique identifier for each customer | | name | character varying | false | Name of the customer | | email | character varying | false | Email address of the customer | diff --git a/sql/commands/sql-discard.mdx b/sql/commands/sql-discard.mdx index 22a36f97..0553080b 100644 --- a/sql/commands/sql-discard.mdx +++ b/sql/commands/sql-discard.mdx @@ -12,5 +12,5 @@ DISCARD ALL; ## Parameter | Parameter | Description | -| --------- | -------------------------------------------------------------------------------------------- | +| :-------- | :------------------------------------------------------------------------------------------- | | All | Since RisingWave doesn't support temporary object, this command will essentially do nothing. | diff --git a/sql/commands/sql-drop-aggregate.mdx b/sql/commands/sql-drop-aggregate.mdx index 43f8b237..1c64c6d6 100644 --- a/sql/commands/sql-drop-aggregate.mdx +++ b/sql/commands/sql-drop-aggregate.mdx @@ -10,7 +10,7 @@ DROP AGGREGATE [ IF EXISTS ] function_name [ ( argument_type [, ...] ) ] ; ``` | Parameter or clause | Description | -| -------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------- | | _function\_name_ | Name of the UDAF you want to drop. | | ( _argument\_type_ \[ , ... \] ) | Optional: Argument types of the function.Specify the argument types when the name of the function you want to drop isn't unique within the schema. | | IF EXISTS | Do not return an error if the specified function does not exist. A notice is issued in this case. | diff --git a/sql/commands/sql-drop-connection.mdx b/sql/commands/sql-drop-connection.mdx index f285d67c..df9f6ab1 100644 --- a/sql/commands/sql-drop-connection.mdx +++ b/sql/commands/sql-drop-connection.mdx @@ -13,7 +13,7 @@ DROP CONNECTION [ IF EXISTS ] connection_name; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------- | +| :------------------ | :---------------------------------------- | | _connection\_name_ | The name of the connection to be removed. | ## Examples diff --git a/sql/commands/sql-drop-database.mdx b/sql/commands/sql-drop-database.mdx index a46d5d31..eb75c529 100644 --- a/sql/commands/sql-drop-database.mdx +++ b/sql/commands/sql-drop-database.mdx @@ -6,7 +6,7 @@ description: "Use the `DROP DATABASE` command to remove a database from your Ris Before you can remove a database, you must use [DROP SCHEMA](/docs/current/sql-drop-schema/) to remove all its dependent schemas. -**CAUTION** + `DROP DATABASE` removes all data in a database and cannot be undone. @@ -20,7 +20,7 @@ DROP DATABASE [ IF EXISTS ] database_name; ## Parameters | Parameter or clause | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------- | | **IF EXISTS** clause | Do not return an error if the specified database does not exist. | | _database\_name_ | The name of the database you want to remove. You can use [SHOW DATABASES](/docs/current/sql-show-databases/) to get a list of all available databases. | diff --git a/sql/commands/sql-drop-function.mdx b/sql/commands/sql-drop-function.mdx index 9d8743ef..b74b50e4 100644 --- a/sql/commands/sql-drop-function.mdx +++ b/sql/commands/sql-drop-function.mdx @@ -11,7 +11,7 @@ DROP FUNCTION [ IF EXISTS ] function_name [ ( argument_type [, ...] ) ] ; ``` | Parameter or clause | Description | -| -------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------- | | _function\_name_ | Name of the UDF you want to drop. | | ( _argument\_type_ \[ , ... \] ) | Optional: Argument types of the function.Specify the argument types when the name of the function you want to drop isn't unique within the schema. | | IF EXISTS | Do not return an error if the specified function does not exist. A notice is issued in this case. | @@ -31,8 +31,6 @@ DROP FUNCTION function_name; You can run [SHOW FUNCTIONS;](/docs/current/sql-show-functions/) to list all existing UDFs to see if a function name is unique. -**TIP** - `DROP FUNCTION function_name();` drops a function with zero arguments. `DROP FUNCTION function_name;` drops a function with any number of arguments, including zero, as long as the name is unique. diff --git a/sql/commands/sql-drop-index.mdx b/sql/commands/sql-drop-index.mdx index 9e1d529c..e23c54c9 100644 --- a/sql/commands/sql-drop-index.mdx +++ b/sql/commands/sql-drop-index.mdx @@ -12,7 +12,7 @@ DROP INDEX [ IF EXISTS ] [ schema_name.]index_name [ CASCADE ]; ## Parameters | Parameter | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **IF EXISTS** clause | Do not return an error if the specified index does not exist. | | _schema\_name_ | The schema of the index that you want to remove. You can use [SHOW SCHEMAS](/docs/current/sql-show-schemas/) to get a list of all available schemas. If you don't specify a schema, the specified index in the default schema public will be removed. | | _index\_name_ | The name of the index to remove. You can use [DESCRIBE](/docs/current/sql-describe/) to show the indexes of a table. | diff --git a/sql/commands/sql-drop-mv.mdx b/sql/commands/sql-drop-mv.mdx index 8e5e9d09..4030de15 100644 --- a/sql/commands/sql-drop-mv.mdx +++ b/sql/commands/sql-drop-mv.mdx @@ -14,7 +14,7 @@ DROP MATERIALIZED VIEW [ IF EXISTS ] [schema_name.]mv_name [ CASCADE ]; ## Parameters | Parameter | Description | -| ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _schema\_name_ | Specify the name of a schema to remove the materialized view in that schema. You can use [SHOW SCHEMAS](/docs/current/sql-show-schemas/) to get a list of all available schemas. If you don't specify a schema, the specified materialized view in the default schema public will be removed. | | _mv\_name_ | The name of the materialized view to remove. You can use [SHOW MATERIALIZED VIEWS](/docs/current/sql-show-mv/) to get a list of all available materialized views. | | **CASCADE** option | If this option is specified, all objects (such as other materialized views or regular views) that depend on the materialized view, and in turn all objects that depend on those objects will be dropped. | diff --git a/sql/commands/sql-drop-schema.mdx b/sql/commands/sql-drop-schema.mdx index 4e09ca8f..95d8a729 100644 --- a/sql/commands/sql-drop-schema.mdx +++ b/sql/commands/sql-drop-schema.mdx @@ -14,7 +14,7 @@ DROP SCHEMA [ IF EXISTS ] [database_name.]schema_name; ## Parameters | Parameter or clause | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **IF EXISTS** clause | Do not return an error if the specified schema does not exist. | | _database_ | Specify the name of a database to remove the schema in that database. You can use [SHOW DATABASES](/docs/current/sql-show-databases/) to get a list of all available databases. If you don't specify a database, the specified schema in the default database will be removed. | | _schema_ | The name of the schema you want to remove. The default schema is public. You can use [SHOW SCHEMAS](/docs/current/sql-show-schemas/) to get a list of all available schemas. | diff --git a/sql/commands/sql-drop-secret.mdx b/sql/commands/sql-drop-secret.mdx index 7cd04ecb..709a8f00 100644 --- a/sql/commands/sql-drop-secret.mdx +++ b/sql/commands/sql-drop-secret.mdx @@ -12,7 +12,7 @@ DROP SECRET secret_name; ## Parameters | Parameter or Clause | Description | -| ------------------- | ------------------------------------- | +| :------------------ | :------------------------------------ | | _secret\_name_ | The name of the secret to be dropped. | ## Examples diff --git a/sql/commands/sql-drop-sink.mdx b/sql/commands/sql-drop-sink.mdx index 1f7a91d8..5bdcaaa8 100644 --- a/sql/commands/sql-drop-sink.mdx +++ b/sql/commands/sql-drop-sink.mdx @@ -12,7 +12,7 @@ DROP SINK [ IF EXISTS ] [schema_name.]sink_name [ CASCADE ]; ## Parameters | Parameter | Description | -| ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _schema\_name_ | The schema of the sink that you want to remove. You can use [SHOW SCHEMAS](/docs/current/sql-show-schemas/) to get a list of all available schemas. If you don't specify a schema, the specified sink in the default schema public will be removed. | | _sink\_name_ | The name of the sink to remove. | | **CASCADE** option | If this option is specified, all objects (such as materialized views) that depend on the sink, and in turn all objects that depend on those objects will be dropped. | diff --git a/sql/commands/sql-drop-source.mdx b/sql/commands/sql-drop-source.mdx index c010dbfc..d3b0a47a 100644 --- a/sql/commands/sql-drop-source.mdx +++ b/sql/commands/sql-drop-source.mdx @@ -14,7 +14,7 @@ DROP SOURCE [ IF EXISTS ] [schema_name.]source_name [ CASCADE ]; ## Parameters[](#parameters "Direct link to Parameters") | Parameter | Description | -| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | _schema\_name_ | The schema of the source that you want to remove. You can use [SHOW SCHEMAS](/docs/current/sql-show-schemas/) to get a list of all available schemas. If you don't specify a schema, the specified source in the default schema public will be removed. | | _source\_name_ | The name of the source to remove. | | **CASCADE** option | If this option is specified, all objects (such as materialized views) that depend on the source, and in turn all objects that depend on those objects will be dropped. | diff --git a/sql/commands/sql-drop-table.mdx b/sql/commands/sql-drop-table.mdx index 4cbc9eb8..a723480e 100644 --- a/sql/commands/sql-drop-table.mdx +++ b/sql/commands/sql-drop-table.mdx @@ -13,7 +13,7 @@ DROP TABLE [ IF EXISTS ] [schema_name.]table_name [ CASCADE ]; ## Parameters | Parameter | Description | -| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _schema_ | Specify the name of a schema to remove the table in that schema. You can use [SHOW SCHEMAS](/docs/current/sql-show-schemas/) to get a list of all available schemas. If you don't specify a schema, the specified source in the default schema public will be removed. | | _table_ | The name of the table to remove. You can use [SHOW TABLES](/docs/current/sql-show-tables/) to get a list of all available tables. | | **CASCADE** option | If this option is specified, all objects (such as materialized views) that depend on the table, and in turn all objects that depend on those objects will be dropped. | diff --git a/sql/commands/sql-drop-user.mdx b/sql/commands/sql-drop-user.mdx index 8d41d173..7cee9852 100644 --- a/sql/commands/sql-drop-user.mdx +++ b/sql/commands/sql-drop-user.mdx @@ -12,7 +12,7 @@ DROP USER [ IF EXISTS ] user_name [ , ... ]; ## Parameters | Parameter | Description | -| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **IF EXISTS** | Do not return an error if the specified user does not exist. | | _user\_name_ | The user you want to drop. \- You cannot drop the current user; \- To drop a superuser (user with the SUPERUSER privilege), you must be a superuser yourself; \- To drop a non-superuser, you must have the CREATEUSER privilege. | diff --git a/sql/commands/sql-drop-view.mdx b/sql/commands/sql-drop-view.mdx index 84140dfe..ceb08e28 100644 --- a/sql/commands/sql-drop-view.mdx +++ b/sql/commands/sql-drop-view.mdx @@ -12,7 +12,7 @@ DROP VIEW [ IF EXISTS ] view_name [ CASCADE ]; ## Parameters | Parameter | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | **IF EXISTS** clause | Do not return an error if the specified view does not exist. | | _view\_name_ | Name of the view to be dropped. | | **CASCADE** option | If this option is specified, all objects (such as materialized views or other regular views) that depend on the view, and in turn all objects that depend on those objects will be dropped. | diff --git a/sql/commands/sql-explain.mdx b/sql/commands/sql-explain.mdx index 942da6e1..9415de4d 100644 --- a/sql/commands/sql-explain.mdx +++ b/sql/commands/sql-explain.mdx @@ -12,23 +12,21 @@ EXPLAIN [ ( option [ , ... ] ) ] statement; ## Parameters | Parameter | Description | -| -------------------- | --------------------------------------------- | +| :------------------- | :-------------------------------------------- | | _statement_ | A statement that is executable in RisingWave. | | **EXPLAIN** _option_ | See the table below. | #### `EXPLAIN` options | Option | Description | | -| ------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------------------ | :------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **VERBOSE** \[ TRUE \| FALSE \] | Show additional information regarding the execution plan such as the table catalog of the state table and the schema of each operator. | | | **TRACE** \[ TRUE \| FALSE \] | Show the trace of each optimization stage, not only the final plan. | | | **TYPE** \[ PHYSICAL \| LOGICAL | DISTSQL \] | Show the execution plan of a specific phase.PHYSICAL — Show the batch plan or stream plan.LOGICAL — Show the optimized logical plan.DISTSQL — Show the distributed query plan for batch or stream. | - -**NOTE** - + The boolean parameter `[ TRUE | FALSE ]` specifies whether the specified option should be enabled or disabled. Use `TRUE` to enable the option, and `FALSE` to disable it. It defaults to `TRUE` if the parameter is not specified. - + ## Examples diff --git a/sql/commands/sql-grant.mdx b/sql/commands/sql-grant.mdx index d102b8e6..21bfdd77 100644 --- a/sql/commands/sql-grant.mdx +++ b/sql/commands/sql-grant.mdx @@ -54,7 +54,7 @@ TO user_name [WITH GRANT OPTION] [GRANTED BY user_name]; ## Parameters | Parameter or clause | Description | -| ---------------------------- | -------------------------------------------------------------------------------------------------------------- | +| :--------------------------- | :------------------------------------------------------------------------------------------------------------- | | **WITH GRANT OPTION** clause | The WITH GRANT OPTION clause allows the grantee to grant the privilege to other users. | | **GRANTED BY** clause | The specified user after the GRANTED BY clause must be the current user. By default, the current user is root. | diff --git a/sql/commands/sql-insert.mdx b/sql/commands/sql-insert.mdx index c958a0fa..df132c46 100644 --- a/sql/commands/sql-insert.mdx +++ b/sql/commands/sql-insert.mdx @@ -4,7 +4,7 @@ description: "Use the `INSERT` command to insert new rows into an existing table --- -**INFO** + * For tables with primary keys, if you insert a row with an existing key, the new row will overwrite the existing row. * Call [FLUSH](/docs/current/sql-flush/) after `INSERT` to persist the changes to storage. This ensures that the changes are committed and visible for subsequent reads. @@ -21,7 +21,7 @@ INSERT INTO table_name [ ( col_name [ , ... ] ) ] ## Parameters | Parameter or clause | Description | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _table\_name_ | The table where you want to insert rows. | | _col\_name_ | The column where you want to insert corresponding values. Currently, you must provide all columns in the table in order or leave this field empty. | | _value_ | An expression or value to assign to the corresponding column. You can use [DESCRIBE](/docs/current/sql-describe/) to check the order of the columns in the table. | diff --git a/sql/commands/sql-revoke.mdx b/sql/commands/sql-revoke.mdx index 6e3101b9..2ee3a023 100644 --- a/sql/commands/sql-revoke.mdx +++ b/sql/commands/sql-revoke.mdx @@ -51,7 +51,7 @@ FROM user_name [GRANTED BY user_name]; ## Parameters | Parameter or clause | Description | -| --------------------- | -------------------------------------------------------------------------------------------------------------- | +| :-------------------- | :------------------------------------------------------------------------------------------------------------- | | **GRANTED BY** clause | The specified user after the GRANTED BY clause must be the current user. By default, the current user is root. | ## Example diff --git a/sql/commands/sql-select.mdx b/sql/commands/sql-select.mdx index 3e8bcd08..5ae2ba06 100644 --- a/sql/commands/sql-select.mdx +++ b/sql/commands/sql-select.mdx @@ -30,7 +30,7 @@ Where `from_item` can be: ## Parameters | Parameter or clause | Description | -| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **WITH** clause | Provides a way to write supplemental statements for a larger query. For more information, see [WITH clause](/docs/current/query-syntax-with-clause/). | | **DISTINCT** clause | This clause eliminates duplicate rows from the result. SELECT DISTINCT eliminates duplicate rows based on **all selected columns**. SELECT DISTINCT ON allows you to specify expressions or columns and returns only the first row for each unique combination. It requires the use of the ORDER BY clause to determine the first row, and the DISTINCT ON expression must match the leftmost ORDER BY expression. The ORDER BY clause will normally contain additional expressions that determine the desired precedence of rows within each DISTINCT ON group. In this case, this expression can be an alternative with group [topN](/docs/current/sql-pattern-topn/) when "N=1". See [examples of this clause](#distinct-clause) below to know more about it. | | **EXCEPT** clause | Exclude one or more columns from the result set. By specifying _except\_column_, the query will return all columns in the result set except those specified. | diff --git a/sql/commands/sql-set-time-zone.mdx b/sql/commands/sql-set-time-zone.mdx index 906e64d8..7748f5cf 100644 --- a/sql/commands/sql-set-time-zone.mdx +++ b/sql/commands/sql-set-time-zone.mdx @@ -11,7 +11,7 @@ SET TIME ZONE { time_zone | LOCAL | DEFAULT }; ## Parameters | Parameter | Description | -| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _time\_zone_ | Specifies the time zone using a valid time zone name, such as "America/New\_York" or "Asia/Shanghai". You can find a list of all possible _time\_zone_ values [here](https://en.wikipedia.org/wiki/List%5Fof%5Ftz%5Fdatabase%5Ftime%5Fzones) | | **LOCAL** | Sets the time zone to the system's local time zone. | | **DEFAULT** | Sets the time zone to the server's default time zone. | diff --git a/sql/commands/sql-set.mdx b/sql/commands/sql-set.mdx index 53f7b73a..f92d3b76 100644 --- a/sql/commands/sql-set.mdx +++ b/sql/commands/sql-set.mdx @@ -12,6 +12,6 @@ SET parameter_name { TO | = } { value | 'value' | DEFAULT}; ## Parameters | Parameter or clause | Description | -| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _parameter\_name_ | Name of the runtime parameters. | | _value_ | New value of parameter. Values can be specified as string constants, identifiers, numbers, or comma-separated lists of these, as appropriate for the particular parameter. DEFAULT can be written to specify resetting the parameter to its default value (that is, whatever value it would have had if no SET had been executed in the current session). | diff --git a/sql/commands/sql-show-columns.mdx b/sql/commands/sql-show-columns.mdx index afdc6565..7a230897 100644 --- a/sql/commands/sql-show-columns.mdx +++ b/sql/commands/sql-show-columns.mdx @@ -13,7 +13,7 @@ SHOW COLUMNS FROM relation_name [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _relation\_name_ | The name of the table, source, sink, view, or materialized view from which the columns will be listed. | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | diff --git a/sql/commands/sql-show-connections.mdx b/sql/commands/sql-show-connections.mdx index 59046786..e09223ac 100644 --- a/sql/commands/sql-show-connections.mdx +++ b/sql/commands/sql-show-connections.mdx @@ -12,7 +12,7 @@ SHOW CONNECTIONS [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | ## Example diff --git a/sql/commands/sql-show-create-index.mdx b/sql/commands/sql-show-create-index.mdx index 6455093c..2d644f10 100644 --- a/sql/commands/sql-show-create-index.mdx +++ b/sql/commands/sql-show-create-index.mdx @@ -12,7 +12,7 @@ SHOW CREATE INDEX index_name; ## Parameters | Parameter | Description | -| ------------- | ------------------------------- | +| :------------ | :------------------------------ | | _index\_name_ | The index to show the query of. | ## Example diff --git a/sql/commands/sql-show-create-mv.mdx b/sql/commands/sql-show-create-mv.mdx index a1fb2868..83117e27 100644 --- a/sql/commands/sql-show-create-mv.mdx +++ b/sql/commands/sql-show-create-mv.mdx @@ -12,7 +12,7 @@ SHOW CREATE MATERIALIZED VIEW mv_name; ## Parameters | Parameter | Description | -| ---------- | ------------------------------------------- | +| :--------- | :------------------------------------------ | | _mv\_name_ | The materialized view to show the query of. | ## Example diff --git a/sql/commands/sql-show-create-sink.mdx b/sql/commands/sql-show-create-sink.mdx index 2bc34711..fd3cbd45 100644 --- a/sql/commands/sql-show-create-sink.mdx +++ b/sql/commands/sql-show-create-sink.mdx @@ -12,7 +12,7 @@ SHOW CREATE SINK sink_name; ## Parameters | Parameter | Description | -| ------------ | -------------------------------------------------------------------- | +| :----------- | :------------------------------------------------------------------- | | _sink\_name_ | The sink for which you want to show the corresponding SQL statement. | ## See also diff --git a/sql/commands/sql-show-create-source.mdx b/sql/commands/sql-show-create-source.mdx index c98c6c57..129451b2 100644 --- a/sql/commands/sql-show-create-source.mdx +++ b/sql/commands/sql-show-create-source.mdx @@ -12,7 +12,7 @@ SHOW CREATE SOURCE source_name; ## Parameters | Parameter | Description | -| -------------- | ---------------------------------------------------------------------- | +| :------------- | :--------------------------------------------------------------------- | | _source\_name_ | The source for which you want to show the corresponding SQL statement. | ## See also diff --git a/sql/commands/sql-show-create-table.mdx b/sql/commands/sql-show-create-table.mdx index 02eda8ee..82ca8090 100644 --- a/sql/commands/sql-show-create-table.mdx +++ b/sql/commands/sql-show-create-table.mdx @@ -12,7 +12,7 @@ SHOW CREATE TABLE table_name; ## Parameters | Parameter | Description | -| ------------- | ------------------------------- | +| :------------ | :------------------------------ | | _table\_name_ | The table to show the query of. | ## Example diff --git a/sql/commands/sql-show-create-view.mdx b/sql/commands/sql-show-create-view.mdx index 3a2accd8..b440d367 100644 --- a/sql/commands/sql-show-create-view.mdx +++ b/sql/commands/sql-show-create-view.mdx @@ -12,7 +12,7 @@ SHOW CREATE VIEW view_name; ## Parameters | Parameter | Description | -| ------------ | ------------------------------ | +| :----------- | :----------------------------- | | _view\_name_ | The view to show the query of. | ## Example diff --git a/sql/commands/sql-show-databases.mdx b/sql/commands/sql-show-databases.mdx index 8088df4d..e2cf5efc 100644 --- a/sql/commands/sql-show-databases.mdx +++ b/sql/commands/sql-show-databases.mdx @@ -12,7 +12,7 @@ SHOW DATABASES [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | ## Example diff --git a/sql/commands/sql-show-functions.mdx b/sql/commands/sql-show-functions.mdx index aa3095b7..47a1178d 100644 --- a/sql/commands/sql-show-functions.mdx +++ b/sql/commands/sql-show-functions.mdx @@ -12,7 +12,7 @@ SHOW FUNCTIONS [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | ## Example diff --git a/sql/commands/sql-show-indexes.mdx b/sql/commands/sql-show-indexes.mdx index 3a64ce63..a819fdef 100644 --- a/sql/commands/sql-show-indexes.mdx +++ b/sql/commands/sql-show-indexes.mdx @@ -12,7 +12,7 @@ SHOW INDEXES FROM table_name [ LIKE_expression ]; ## Parameters | Parameter | Description | -| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _table\_name_ | The table from which indexes will be displayed. | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | diff --git a/sql/commands/sql-show-internal-tables.mdx b/sql/commands/sql-show-internal-tables.mdx index f7dbe16f..6b9fb58b 100644 --- a/sql/commands/sql-show-internal-tables.mdx +++ b/sql/commands/sql-show-internal-tables.mdx @@ -14,7 +14,7 @@ SHOW INTERNAL TABLES [ FROM schema_name ] [ LIKE_expression ]; ## Parameters | Parameter | Description | -| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _schema\_name_ | The schema in which tables will be listed. If not given, tables from the default schema, public, will be listed. | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | diff --git a/sql/commands/sql-show-jobs.mdx b/sql/commands/sql-show-jobs.mdx index 4988b7f7..772fe428 100644 --- a/sql/commands/sql-show-jobs.mdx +++ b/sql/commands/sql-show-jobs.mdx @@ -14,7 +14,7 @@ SHOW JOBS [ LIKE_expression ]; ## Parameters | Parameter | Description | -| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :--------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | ## Example @@ -34,7 +34,7 @@ SHOW JOBS; title="Monitor statement progress" icon="chart-line" iconType="solid" - href="/docs/current/view-statement-progress/" + href="/docs/current/monitor-statement-progress/" /> -**INFO** + This command only shows the frontend received processlist now. diff --git a/sql/commands/sql-show-schemas.mdx b/sql/commands/sql-show-schemas.mdx index 28e07130..8ca5182d 100644 --- a/sql/commands/sql-show-schemas.mdx +++ b/sql/commands/sql-show-schemas.mdx @@ -13,7 +13,7 @@ SHOW SCHEMAS [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | ## Example diff --git a/sql/commands/sql-show-sinks.mdx b/sql/commands/sql-show-sinks.mdx index 7e1efcf5..747b1db0 100644 --- a/sql/commands/sql-show-sinks.mdx +++ b/sql/commands/sql-show-sinks.mdx @@ -6,13 +6,14 @@ description: "Use the `SHOW SINKS` command to return a list of all sinks." ## Syntax ```bash -SHOW SINKS [ LIKE_expression ]; +SHOW SINKS [ FROM schema_name ] [ LIKE_expression ]; ``` ## Parameters -| Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Clause | Description | +| :------------------ | :-------------------------- | +| schema\_name |The schema of the sinks to be listed.| | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | ## Example diff --git a/sql/commands/sql-show-sources.mdx b/sql/commands/sql-show-sources.mdx index 1d615ecd..f425a44f 100644 --- a/sql/commands/sql-show-sources.mdx +++ b/sql/commands/sql-show-sources.mdx @@ -12,7 +12,7 @@ SHOW SOURCES [ FROM schema_name ] [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _schema\_name_ | The schema of the sources to be listed. The default schema is public. | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | diff --git a/sql/commands/sql-show-tables.mdx b/sql/commands/sql-show-tables.mdx index 94142592..43e8c561 100644 --- a/sql/commands/sql-show-tables.mdx +++ b/sql/commands/sql-show-tables.mdx @@ -12,7 +12,7 @@ SHOW TABLES [ FROM schema_name ] [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _schema\_name_ | The schema in which tables will be listed. If not given, tables from the default schema, public, will be listed. | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | diff --git a/sql/commands/sql-show-views.mdx b/sql/commands/sql-show-views.mdx index e73eda1b..0565354f 100644 --- a/sql/commands/sql-show-views.mdx +++ b/sql/commands/sql-show-views.mdx @@ -12,7 +12,7 @@ SHOW VIEWS [ FROM schema_name ] [ LIKE_expression ]; ## Parameters | Parameter or clause | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _schema\_name_ | The schema from which existing views will be listed. If not given, views from the default schema, "public", will be listed. | | LIKE\_expression | Filters the output based on names by applying pattern matching. See details in [LIKE pattern matching expressions](/docs/current/sql-function-string/#like-pattern-matching-expressions). | diff --git a/sql/commands/sql-update.mdx b/sql/commands/sql-update.mdx index 73b3e2c2..e85640b6 100644 --- a/sql/commands/sql-update.mdx +++ b/sql/commands/sql-update.mdx @@ -4,7 +4,7 @@ description: "Use the `UPDATE` command to modify values of existing rows in a ta --- -**INFO** + * `UPDATE` cannot modify data in the primary key column of a table. * Call [FLUSH](/docs/current/sql-flush/) after `UPDATE` to persist the changes to storage. This ensures that the changes are committed and visible for subsequent reads. @@ -21,7 +21,7 @@ UPDATE table_name ## Parameters | Parameter or clause | Description | -| ------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _table\_name_ | The table whose rows you want to update. | | **SET** _col\_name_ \= _value_ | Assign a value or result of an expression to a specific column._col\_name_ cannot be a primary key. | | **WHERE** _condition_ | Specify which rows you want to update using an expression that returns a boolean value. Rows for which this expression returns true will be updated. If you omit the WHERE clause, all rows in the table will be updated. Subqueries are supported in the condition expression. | diff --git a/sql/data-types/casting.mdx b/sql/data-types/casting.mdx index d1bb810c..1c7877e8 100644 --- a/sql/data-types/casting.mdx +++ b/sql/data-types/casting.mdx @@ -9,7 +9,7 @@ mode: wide * **Explicitly cast to**: Values can be converted to the target type using explicit [Type casts](/docs/current/query-syntax-value-exp/#type-casts). | From type | Implicitly cast to | Assigned to | Explicitly cast to | -| --- | --- | --- | --- | +| :-- | :-- | :-- | :-- | | **boolean** | | varchar | integer | | **smallint** | integer
bigint
numeric
real
double
rw\_int256 | varchar | | | **integer** | bigint
numeric
real
double
rw\_int256 | smallint | boolean | @@ -27,8 +27,6 @@ mode: wide | **jsonb** | boolean
smallint
integer
bigint
numeric
real
double | varchar | | | **rw\_int256** | | varchar | | - -**NOTE** - + Structs can be casted to structs explicitly or implicitly if the nested expressions and types can be casted. - +
diff --git a/sql/data-types/overview.mdx b/sql/data-types/overview.mdx index 5087d414..2a01050d 100644 --- a/sql/data-types/overview.mdx +++ b/sql/data-types/overview.mdx @@ -6,7 +6,7 @@ sidebarTitle: Overview | Type | Aliases | Description | Value | -| --- | --- | --- | --- | +| :-- | :-- | :-- | :-- | | boolean | bool | Logical Boolean.
It follows a three-valued logic system (true, false, or null). | true, false, or null | | smallint | | Two-byte integer | Range: -32768 to 32767 | | integer | int | Four-byte integer | Range: -2147483648 to 2147483647 | @@ -26,11 +26,9 @@ sidebarTitle: Overview | map | | A map contains key-value pairs. | For syntax and examples, see [Map](/docs/current/data-type-map/). | | JSONB | | A (binary) JSON value that ignores semantically-insignificant whitespaces or order of object keys. | For syntax and examples, see [JSONB](/docs/current/data-type-jsonb/). | - -**NOTE** - + Scientific notation (e.g., 1e6, 1.25e5, and 1e-4) is supported in SELECT and INSERT statements. - + ## Casting diff --git a/sql/data-types/rw-int256.mdx b/sql/data-types/rw-int256.mdx index 38ca7938..6b4227d9 100644 --- a/sql/data-types/rw-int256.mdx +++ b/sql/data-types/rw-int256.mdx @@ -2,11 +2,9 @@ title: "rw_int256" --- - -**NOTE** - + `rw_int256` values can be very large, and therefore require more memory and processing power compared to smaller data types. - + ## Overview diff --git a/sql/data-types/supported-protobuf-types.mdx b/sql/data-types/supported-protobuf-types.mdx index 3a483167..8dda2bbb 100644 --- a/sql/data-types/supported-protobuf-types.mdx +++ b/sql/data-types/supported-protobuf-types.mdx @@ -8,7 +8,7 @@ description: "RisingWave supports a variety of protobuf data types, which are co RisingWave converts [well-known types](https://protobuf.dev/reference/protobuf/google.protobuf/) from the protobuf library to specific types in RisingWave. The conversion is as follows: | Protobuf type | RisingWave type | -| --- | --- | +| :-- | :-- | | any | JSONB | | double | double precision | | float | real | @@ -28,7 +28,7 @@ RisingWave converts [well-known types](https://protobuf.dev/reference/protobuf/g | enum | varchar | | message | struct. See details in [Nested messages](#nested-messages). | | repeated | array | -| map | Not supported | +| map | map. See details in [Map](/sql/data-types/map-type). | | google.protobuf.Struct | Not supported | | google.protobuf.Timestamp | `struct` | | google.protobuf.Duration | `struct` | @@ -51,4 +51,4 @@ Will be converted to `struct` in RisingWave. ## Related topics -* [CREATE SOURCE](/docs/next/sql-create-source/) +* [CREATE SOURCE](/sql/commands/sql-create-source/) diff --git a/sql/functions/aggregate.mdx b/sql/functions/aggregate.mdx index 85982bce..8570fc90 100644 --- a/sql/functions/aggregate.mdx +++ b/sql/functions/aggregate.mdx @@ -179,11 +179,9 @@ var_samp ( expression ) -> output_value ``` ## Ordered-set aggregate functions - -**NOTE** - + At present, ordered-set aggregate functions support only constant fraction arguments. - + ### `mode` @@ -203,11 +201,9 @@ SELECT mode() WITHIN GROUP (ORDER BY column1) FROM table1; ### `percentile_cont` - -**NOTE** - + At present, `percentile_cont` is not supported for [streaming queries](/docs/current/key-concepts/#streaming-queries) yet. - + Computes the continuous percentile, which is a value corresponding to the specified fraction within the ordered set of aggregated argument values. It can interpolate between adjacent input items if needed. @@ -228,11 +224,9 @@ If NULL is provided, the function will not calculate a specific percentile and r ### `percentile_disc` - -**NOTE** - + At present, `percentile_disc` is not supported for streaming queries yet. - + Computes the discrete percentile, which is the first value within the ordered set of aggregated argument values whose position in the ordering equals or exceeds the specified fraction. diff --git a/sql/functions/comparison.mdx b/sql/functions/comparison.mdx index 7b295289..2f4c7ada 100644 --- a/sql/functions/comparison.mdx +++ b/sql/functions/comparison.mdx @@ -5,7 +5,7 @@ title: "Comparison functions and operators" ## Comparison operators | Operator | Expression & Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | \= | `operand1 = operand2`
Equal.
TRUE if the operands separated by = have the same value. | 1 = 1 → t
'1' = 1 → t
'a' = 'b' → f
(1, 0) = (1, 1) → f
('a', 'b') = ('a', 'b') → t | | \<>
!= | `operand1 \<> operand2` or `operand1 != operand2`
Not equal.
TRUE if the operands separated by \<> or != have different values. | 1 \<> 1 → f
'1' != 1 → f
'a' != 'b' → t
(1, 0) \<> (1, 1) → t
('a', 'b') != ('a', 'b') → f | | \< | `operand1 < operand2`
Less than.
TRUE if _operand1_ is less than _operand2_. | 0 < 1 → t
1 < 1 → f | @@ -16,7 +16,7 @@ title: "Comparison functions and operators" ## Comparison predicate | Operator | Expression & Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | IS DISTINCT FROM | `operand1 IS DISTINCT FROM operand2`
Not equal (null comparable).
TRUE if _operand1_ is not equal to _operand2_. | 1 IS DISTINCT FROM NULL → t
1 IS DISTINCT FROM 1 → f | | IS NOT DISTINCT FROM | `operand1 IS NOT DISTINCT FROM operand2`
Equal (null comparable).
TRUE if _operand1_ is equal to _operand2_. | 1 IS NOT DISTINCT FROM NULL → f | | BETWEEN ... AND ... | `operand BETWEEN min AND max`
Between (inclusive range).
TRUE if the operand is greater than or equal to _min_ and less than or equal to _max_. | 1 BETWEEN 0 AND 1 → t
'c' BETWEEN 'a' AND 'b' → f | diff --git a/sql/functions/conditional.mdx b/sql/functions/conditional.mdx index 51df5dda..d62a1223 100644 --- a/sql/functions/conditional.mdx +++ b/sql/functions/conditional.mdx @@ -19,7 +19,7 @@ END #### Parameters | Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| :---------- | :--------------------------------------------------------------------------------------------------------------------------------------------- | | _condition_ | An expression that evaluates to a BOOLEAN value. | | _result_ | A value or an expression that evaluates to a value. The **CASE** expression returns _result_ if its associated _condition_ evaluates to true. | @@ -43,7 +43,7 @@ END #### Parameters | Parameter | Description | -| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :----------- | :------------------------------------------------------------------------------------------------------------------------------------------------------- | | _expression_ | An expression that evaluates to a value. _expression_ is computed in the first place and its value will be compared with _value_ in the **WHEN** clause. | | _value_ | A value or an expression that evaluates to a value. Each value is a potential match for the _expression_. | | _result_ | A value or an expression that evaluates to a value. The **CASE** expression returns _result_ if its associated _value_ matches the _expression_. | diff --git a/sql/functions/cryptographic.mdx b/sql/functions/cryptographic.mdx index 422e2614..67363299 100644 --- a/sql/functions/cryptographic.mdx +++ b/sql/functions/cryptographic.mdx @@ -32,11 +32,9 @@ algorithm [-mode][/pad:padding] * pkcs — data may be any length (default) * none — data must be multiple of cipher block size - -**NOTE** - + The given encryption/decryption key MUST match length 16/24/32 bytes as required by aes-128/192/256. - + ```bash Examples of type text aes-cbc/pad:pkcs => AES algorithm, cbc mode, enabling padding diff --git a/sql/functions/datetime.mdx b/sql/functions/datetime.mdx index 8ed50ad8..4f7f021e 100644 --- a/sql/functions/datetime.mdx +++ b/sql/functions/datetime.mdx @@ -4,7 +4,7 @@ title: "Date and time functions and operators" ## Timespan operators | Operation | Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | interval \* double precision → interval | Multiplies an interval by a double. | `real '6.1' * interval '1' second` → `00:00:06.1`
`interval '1' second * real '6.1'` → `00:00:06.1` | | interval / double precision → interval | Divides an interval by a double. Error is thrown for division by zero. | `interval '12 days' / 4.2` → `2 days 20:34:17.143`
`interval '14000' / int '14'` → `00:16:40` | | interval + interval → interval | Adds an interval to an interval. | `interval '20' hour + interval '10' hour` → `30:00:00` | @@ -16,7 +16,7 @@ title: "Date and time functions and operators" ## Offsetting operators | Operation | Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | timestamp + interval → timestamp | Adds an interval to a timestamp. | `'2022-03-13 01:00:00'::timestamp + interval '24' hour` → `2022-03-14 01:00:00` | | timestamp - interval → timestamp | Subtracts an interval from a timestamp. | `'2022-03-14 01:00:00'::timestamp - interval '24' hour` → `2022-03-13 01:00:00` | | timestamp - timestamp → interval | Subtracts a timestamp from a timestamp. | `'2022-03-13 03:00:00'::timestamp - '2022-03-13 01:00:00'` → `02:00:00` | @@ -30,17 +30,15 @@ title: "Date and time functions and operators" ## Timestamp with time zone operators | Operation | Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | timestamp AT TIME ZONE _time\_zone_ → timestamptz

timestamptz AT TIME ZONE _time\_zone_ → timestamp | Converts times from timestamp to timestamptz (i.e., timestamp with time zone) or timestamptz to timestamp. Invalid local time during daylight saving forward is not supported. Ambiguous local time during daylight saving backward is interpreted as after the transition. | `'2021-12-31 16:00:00'::timestamp AT TIME ZONE 'us/pacific'` → `2022-01-01 00:00:00+00:00`

`'2022-01-01 00:00:00Z'::timestamptz AT TIME ZONE 'us/pacific'` → `2021-12-31 16:00:00` | | timestamptz + interval → timestamptz | Adds a fixed interval to a timestamp with time zone. See note below. | `'2022-03-13 01:00:00Z'::timestamp with time zone + interval '24' hour` → `2022-03-14 01:00:00+00:00` | | timestamptz - interval → timestamptz | Subtracts a fixed interval from a timestamp with time zone. See note below. | `'2022-03-14 01:00:00Z'::timestamp with time zone - interval '24' hour` → `2022-03-13 01:00:00+00:00` | | timestamptz - timestamptz → interval | Subtracts a timestamp with time zone from a timestamp with time zone and converts 24-hour intervals into days. | `'2023-07-30 13:22:00-05:00'::timestamptz - '2023-07-29 13:22:00-04:00'::timestamptz` → `1 day 01:00:00` | - -**NOTE** - + An interval can contain hour/minute/second (i.e., fixed length) but not year/month/day (i.e., variable length). - + ## Date and time functions @@ -281,7 +279,7 @@ For date and time formatting functions like `to_char`, `to_timestamp`, and `to_d Please see the table below for the template patterns supported in RisingWave. | Pattern | Description | -| ------------------ | ---------------------------------------------------------- | +| :----------------- | :--------------------------------------------------------- | | HH24 or hh24 | hour of day (00–23) | | HH12 or hh12 | hour of day (01–12) | | HH or hh | hour of day (01–12) | diff --git a/sql/functions/json.mdx b/sql/functions/json.mdx index 4e77fddb..fd8dcf3c 100644 --- a/sql/functions/json.mdx +++ b/sql/functions/json.mdx @@ -356,12 +356,10 @@ SELECT * FROM jsonb_populate_record( ``` - -**NOTE** - + The `jsonb_populate_record` function in RisingWave differs from the function in PostgreSQL. In PostgreSQL, users are required to define a **composite type** using the `CREATE TYPE` statement before using these functions. However, in RisingWave, you should use the **inline struct type** instead. - + ### `jsonb_populate_recordset`[](#jsonb%5Fpopulate%5Frecordset "Direct link to jsonb_populate_recordset") Expands the top-level JSON array of objects to a set of rows having the **struct type** of the base argument. Each element of the JSON array is processed as described above for [jsonb\_populate\_record](#jsonb%5Fpopulate%5Frecord). @@ -380,11 +378,9 @@ select * from jsonb_populate_recordset( 3 4 ``` - -**NOTE** - + The `jsonb_populate_recordset` function in RisingWave differs from the function in PostgreSQL. In PostgreSQL, users are required to define a **composite type** using the `CREATE TYPE` statement before using these functions. However, in RisingWave, you should use the **inline struct type** instead. - + ### `jsonb_populate_map`[](#jsonb%5Fpopulate%5Fmap "Direct link to jsonb_populate_map") diff --git a/sql/functions/logical.mdx b/sql/functions/logical.mdx index 3fc21123..5cfd5d0d 100644 --- a/sql/functions/logical.mdx +++ b/sql/functions/logical.mdx @@ -4,7 +4,7 @@ mode: wide --- | Operator | Expression & Description | -| -------- | ------------------------------------------------------------------------------------- | +| :------- | :------------------------------------------------------------------------------------ | | AND | boolean1 AND boolean2 Logical AND. TRUE if both _boolean1_ and _boolean2_ are TRUE. | | OR | boolean1 OR boolean2 Logical OR. TRUE if either _boolean1_ or _boolean2_ is TRUE. | | NOT | NOT boolean Negates value. | @@ -12,7 +12,7 @@ mode: wide **Example** | a | b | a AND b | a OR b | NOT a | -| ----- | ----- | ------- | ------ | ----- | +| :---- | :---- | :------ | :----- | :---- | | TRUE | TRUE | TRUE | TRUE | FALSE | | TRUE | FALSE | FALSE | TRUE | FALSE | | TRUE | NULL | NULL | TRUE | FALSE | diff --git a/sql/functions/mathematical.mdx b/sql/functions/mathematical.mdx index eb3c5074..0df48b7a 100644 --- a/sql/functions/mathematical.mdx +++ b/sql/functions/mathematical.mdx @@ -5,7 +5,7 @@ title: "Mathematical functions and operators" ## Mathematical operators | Operator | Expression & Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | `+` | `operand1 + operand2`
Addition. | `1 + 2 → 3` | | `-` | `operand1 - operand2`
Subtraction. | `1 - 2 → -1` | | `-` | `- operand`
Negation. | `- (-1) → 1` | @@ -24,7 +24,7 @@ title: "Mathematical functions and operators" ## Mathematical functions | Function | Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | abs ( _input\_value_ ) → _absolute\_value_
@ ( _input\_value_ ) → _absolute\_value_ | Returns the absolute value of _input\_value_. The _input\_value_ can be type int or decimal. The return type is the same as the _input\_value_ type. | abs(-3) → 3
@(-3) → 3 | | cbrt ( _double\_precision\_input_ ) → _double\_precision\_output_ | Returns the cube root of the input. | cbrt(27) → 3 | | ceil ( _numeric\_input_ ) → _integer\_output_
ceil ( _double\_precision\_input_ ) → _integer\_output_ | Returns the nearest integer greater than or equal to the argument. ceiling() can also be used as an alias for ceil(). | ceil(1.23559) → 2
ceiling(-1.23559) → -1 | @@ -45,7 +45,7 @@ title: "Mathematical functions and operators" ## Trigonometric functions | Function | Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | sin ( _radians_ ) → _sine_ | Returns the trigonometric sine (in double precision) of an angle measured in radians (in double precision). | sin(1) → 0.8414709848078965 | | cos ( _radians_ ) → _cosine_ | Returns the trigonometric cosine (in double precision) of an angle measured in radians (in double precision). | cos(1) → 0.5403023058681398 | | tan ( _radians_ ) → _tangent_ | Returns the trigonometric tangent (in double precision) of an angle measured in radians (in double precision). | tan(1) → 1.5574077246549021 | @@ -70,6 +70,6 @@ title: "Mathematical functions and operators" ## Degrees and radians functions | Function | Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | degrees ( _radians_ ) → _degrees_ | Returns the conversion (in double precision) of an angle measured in radians (in double precision) to degrees. | degrees(pi()/2) → 90 | | radians ( _degrees_ ) → _radians_ | Returns the conversion (in double precision) of an angle measured in degrees (in double precision) to radians. | radians(180) → 3.141592653589793 | diff --git a/sql/functions/string.mdx b/sql/functions/string.mdx index 592ae261..98515059 100644 --- a/sql/functions/string.mdx +++ b/sql/functions/string.mdx @@ -5,7 +5,7 @@ title: "String functions and operators" ## String operators | Operator | Expression & Description | Example | -| --- | --- | --- | +| :-- | :-- | :-- | | \| | `expression1 \| expression2 [ \| expression ] ...`
Concatenates two or more expressions. | `'Abcde' \| 1 \| 23` → `Abcde123` | | `^@` | `string ^@ substring`
Returns true (`t`) if _string_ starts with _substring_. This operator is equivalent to the `starts_with`() function. | `'abcdef' ^@ 'abc'` → `t` | ## String functions @@ -111,11 +111,9 @@ convert_from(string bytea, src_encoding name) → text convert_from('\x4346464558'::bytea, 'utf8') → 'CFFEX' ``` - -**NOTE** - + For this function, only encoding UTF8 is supported. RisingWave uses UTF8 encoding to store text, so this function primarily serves as a type conversion operation. - + ### `convert_to` @@ -130,11 +128,9 @@ convert_to(string text, dest_encoding name) → bytea convert_to('Hello World', 'UTF8') → '\\x48656c6c6f20576f726c64' ``` - -*NOTE* - + For this function, only encoding UTF8 is supported. RisingWave uses UTF8 encoding to store text, so this function primarily serves as a type conversion operation. - + ### `decode` @@ -669,11 +665,9 @@ If the pattern does not contain `_` or `%`, then the pattern only represents the To match a literal underscore or percent sign without matching other characters, the respective character in pattern must be preceded by the escape character `\`. To match the escape character itself, write two escape characters: `\\`. - -**NOTE** - + You can use `ESCAPE ''` to disable the escape mechanism, but specifying a custom escape character using the `ESCAPE` clause is not supported. - + ### Examples @@ -696,7 +690,7 @@ The `SIMILAR TO` expression returns true if the string matches the supplied patt ### Metacharacter | Operator | Description | -| -------- | --------------------------------------------------------------- | +| :------- | :-------------------------------------------------------------- | | % | Matches any sequence of zero or more characters. | | \_ | Matches any single character. | | \| | Denotes alternation (either of two alternatives). | diff --git a/sql/functions/sys-admin.mdx b/sql/functions/sys-admin.mdx index 163e1d30..e81afa37 100644 --- a/sql/functions/sys-admin.mdx +++ b/sql/functions/sys-admin.mdx @@ -149,9 +149,7 @@ SELECT pg_stat_get_numscans('my_table'); (1 row) ``` - -**NOTE** - + This is a dummy function intended for compatibility with third-party tools. We keep it here only for reference and it will be eventually removed. Please do not use it in production environments or any important tasks. - + diff --git a/sql/functions/window-functions.mdx b/sql/functions/window-functions.mdx index 3bc0d4f7..5b3b386e 100644 --- a/sql/functions/window-functions.mdx +++ b/sql/functions/window-functions.mdx @@ -19,12 +19,10 @@ The syntax of `row_number()` is: row_number() → integer ``` - -**NOTE** - + We recommend using `row_number()` only for top-N pattern queries. For details about this pattern, see [Top-N by group](/docs/current/sql-pattern-topn/). - + ### `rank()` `rank()` returns the rank of the current row, with gaps; that is, the `row_number` of the first row in its peer group. diff --git a/sql/psql-commands.mdx b/sql/psql-commands.mdx index ba87877a..a4a63874 100644 --- a/sql/psql-commands.mdx +++ b/sql/psql-commands.mdx @@ -5,7 +5,7 @@ mode: wide --- | Command | Description | -| ------- | --------------------------------------------------------------------------- | +| :------ | :-------------------------------------------------------------------------- | | \\d | Lists all relations in the current database. Sources are not yet supported. | | \\di | Lists all indexes in the current database. | | \\dm | Lists all materialized views in the current database. | diff --git a/sql/query-syntax/generated-columns.mdx b/sql/query-syntax/generated-columns.mdx index 5c3af59a..11e38c55 100644 --- a/sql/query-syntax/generated-columns.mdx +++ b/sql/query-syntax/generated-columns.mdx @@ -10,12 +10,10 @@ To create a generated column, use the `AS ` clause in [CR CREATE TABLE t1 (v1 int AS v2-1, v2 int, v3 int AS v2+1); ``` - -**NOTE** - + * A generation expression cannot reference another generated column. * The generated column is created in RisingWave and will not be accessed through the external connector. Therefore, if the external upstream system has a schema, it does not need to include the generated column within the table's schema in the external system. - + A generated column in a table is slightly different from one in a source. diff --git a/sql/query-syntax/group-by-clause.mdx b/sql/query-syntax/group-by-clause.mdx index 09668554..ae877dd4 100644 --- a/sql/query-syntax/group-by-clause.mdx +++ b/sql/query-syntax/group-by-clause.mdx @@ -70,7 +70,7 @@ GROUP BY ROLLUP (product_category, product_subcategory, region); The results are like below: | product\_category | product\_subcategory | region | total\_sales | -| ----------------- | -------------------- | ------ | ------------ | +| :---------------- | :------------------- | :----- | :----------- | | Electronics | Smartphones | North | 1000 | | Electronics | Smartphones | South | 1500 | | Electronics | Smartphones | NULL | 2500 | diff --git a/sql/query-syntax/literals.mdx b/sql/query-syntax/literals.mdx index b4b62706..88465c06 100644 --- a/sql/query-syntax/literals.mdx +++ b/sql/query-syntax/literals.mdx @@ -18,7 +18,7 @@ String literals with C-style escapes use escape sequences to represent special c The following escape sequences are supported: | Escape sequence | Interpretation | -| -------------------------- | ------------------------------------------ | +| :------------------------- | :----------------------------------------- | | \\b | backspace | | \\f | form feed | | \\n | newline | diff --git a/sql/query-syntax/set-operations.mdx b/sql/query-syntax/set-operations.mdx index cbed5d40..c7d052cd 100644 --- a/sql/query-syntax/set-operations.mdx +++ b/sql/query-syntax/set-operations.mdx @@ -32,13 +32,13 @@ _WHERE conditions_ are optional. These conditions must be met for the records to Suppose that we have a table,`points_scored_current_week`, that consists of these columns: `id`, `first_half`, and `second_half`. | id | first\_half | second\_half | -| -- | ----------- | ------------ | +| :- | :---------- | :----------- | | 1 | 10 | 20 | Next, suppose that we have a second table, `points_scored_last_week`, that consists of these columns: `id`, `first_half`, and `second_half`. | id | first\_half | second\_half | -| -- | ----------- | ------------ | +| :- | :---------- | :----------- | | 1 | 10 | 20 | Here is an example that uses the UNION operator: @@ -78,11 +78,9 @@ The result looks like this: | 2 | 10 | 20 | ``` - -**NOTE** - + UNION and UNION ALL operators are both supported for streaming queries. - + ## `INTERSECT` @@ -111,13 +109,13 @@ _WHERE conditions_ are optional. These conditions must be met for the records to Suppose that we have a table,`points_scored_current_week`, that consists of these columns: `id`, `first_half`, and `second_half`. | id | first\_half | second\_half | -| -- | ----------- | ------------ | +| :- | :---------- | :----------- | | 1 | 10 | 20 | Next, suppose that we have a second table, `points_scored_last_week`, that consists of these columns: `id`, `first_half`, and `second_half`. | id | first\_half | second\_half | -| -- | ----------- | ------------ | +| :- | :---------- | :----------- | | 1 | 10 | 20 | Here is an example that uses the `INTERSECT` operator: @@ -140,11 +138,9 @@ The result looks like this: In this case, the `INTERSECT` operator returned the rows that are common to both the `points_scored_current_week` and `points_scored_last_week` tables. If there were no common rows, the `INTERSECT` operator would return an empty set. - -**NOTE** - + `INTERSECT` operator is supported for streaming queries. - + ## `CORRESPONDING` in set operations diff --git a/sql/query-syntax/value-exp.mdx b/sql/query-syntax/value-exp.mdx index 703f7dd8..baa4079d 100644 --- a/sql/query-syntax/value-exp.mdx +++ b/sql/query-syntax/value-exp.mdx @@ -22,7 +22,7 @@ aggregate_name ( [ expression [ , ... ] ] ) WITHIN GROUP ( order_by_clause ) [ F The `DISTINCT` keyword, which is only available in the second form, cannot be used together with an `ORDER BY` or `WITHIN GROUP` clause. Additionally, it's important to note that the `order_by_clause` is positioned differently in the first and fourth forms. -In batch mode, `aggregate_name` can also be in the following form: +`aggregate_name` can also be in the following form: ```sql AGGREGATE:function_name @@ -49,11 +49,9 @@ window_function_name ( [expression [, expression ... ]] ) OVER [frame_clause]) ``` - -**NOTE** - + Currently, the `PARTITION BY` clause is required. If you do not want to partition the rows into smaller sets, you can work around by specifying `PARTITION BY 1::int`. - + For ranking window functions like `row_number`, `rank` and `dense_rank`, `ORDER BY` clause is required. @@ -85,11 +83,9 @@ The requirements of `offset` vary in different frames. In `ROWS` frame, the `off For `SESSION` frame, the requirements of `gap` are very similar to those of `offset` for `RANGE` frame. The `ORDER BY` clause should specify exactly one column and the `gap` expression should be a positive constant of a data type that is determined by the data type of the ordering column. - -**NOTE** - + Currently, `SESSION` frame is only supported in batch mode and Emit-On-Window-Close streaming mode. - + `frame_exclusion` can be either of these: @@ -98,11 +94,9 @@ EXCLUDE CURRENT ROW EXCLUDE NO OTHERS ``` - -**NOTE** - + In RisingWave, `frame_clause` is optional. Depending on whether the `ORDER BY` clause is present, the default value is different. When the `ORDER BY` clause is present, the default value is `ROWS UNBOUNDED PRECEDING AND CURRENT ROW`. When the `ORDER BY` clause is not present, the default value is `ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`. This is different from the behavior in PostgreSQL. We may align the default frame with PostgreSQL in the future. - + ## Type casts @@ -114,9 +108,9 @@ expression::type ``` | Parameter | Description | -| ------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| :----------- | :-------------------------------------------------------------------------------------------------------------------------------- | | _expression_ | The expression of which the data type to be converted. | -| _type_ | The data type of the returned value.For the types you can cast the value to, see \[Casting\](/sql/data-types/data-type-casting.md | +| _type_ | The data type of the returned value. For the types you can cast the value to, see [Casting](/sql/data-types/casting). | ## Row constructors diff --git a/sql/system-catalogs/information-schema.mdx b/sql/system-catalogs/information-schema.mdx index 4591903b..5e6f98db 100644 --- a/sql/system-catalogs/information-schema.mdx +++ b/sql/system-catalogs/information-schema.mdx @@ -7,21 +7,61 @@ description: "The information schema consists of a set of views that contain inf The `information_schema.tables` view contains all tables, views, sinks, and materialized views defined in the current database. - -**NOTE** - + Materialized views are specific to the information schema of RisingWave. They are not included in the information schema of PostgreSQL. - + The `information_schema.tables` view contains the following columns. | Column | Type | Description | -| -------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| table\_catalog | varchar | Name of the current database | +| :------------------- | :------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| table\_catalog | varchar | Name of the current database. | | table\_schema | varchar | Name of the schema that contains the table, view, or materialized view. The default schema for user-created objects is public. | -| table\_name | varchar | Name of the table, view, or materialized view | +| table\_name | varchar | Name of the table, view, or materialized view. | | table\_type | varchar | Type of the table, view, or materialized view. BASE TABLE for a user-defined table, VIEW for a non-materialized view, MATERIALIZED VIEW for a materialized view, and SYSTEM TABLE for a system table. | -| is\_insertable\_into | varchar | YES if the table or view is insertable into, NO if not. User-defined tables are always insertable, while views and materialized views are not necessarily. | +| is\_insertable\_into | varchar | YES if the table or view is insertable into, NO if not. User-defined tables are always insertable, while views and materialized views are not necessarily. + +## Table constraints + +The `table_constraints` view contains all constraints for tables that the current user owns or has privileges other than `SELECT` on. + +The `table_constraints` view contains the following columns. + +|Column|Type|Description| +|---|---|---| +| `constraint_catalog` | varchar | Name of the database that contains the constraint. | +| `constraint_schema` | varchar | Name of the schema that contains the constraint. | +| `constraint_name` | varchar | Name of the constraint. | +| `table_catalog` | varchar | Name of the database that contains the table. | +| `table_schema` | varchar | Name of the schema that contains the table. | +| `table_name` | varchar | Name of the table. | +| `constraint_type` | varchar | Type of the constraint: `PRIMARY KEY`(p), `UNIQUE`(u), `CHECK`(c), or `EXCLUDE`(x). | +| `is_deferrable` | varchar | `YES` if the constraint is deferrable, `NO` if not. | +| `initially_deferred` | varchar | `YES` if the constraint is deferrable and initially deferred, `NO` if not. | +| `enforced` | varchar | `YES` if the constraint is validated and enforced, `NO` if not. | + + +**TEMPORARY LIMITATION** + +This view assumes the constraint schema is the same as the table schema, since `pg_catalog.pg_constraint` only supports primary key. + + + +## Schemata + +The `schemata` view contains all accessible schemas in the current database for users, either by way of being the owner or having some privilege. + +It contains the following columns. + +|Column|Type|Description| +|---|---|---| +|`catalog_name`| varchar | Name of the database containing the schema. | +|`schema_name`| varchar | Name of the schema. | +|`schema_owner` | varchar | Name of the schema owner.| +|`default_character_set_catalog` | varchar | Name of the database that contains the schema's default character set.| +|`default_character_set_schema`|varchar | Name of the schema containing the default character set. | +|`default_character_set_name`|varchar |Name of the schema's default character set.| +|`sql_path`|varchar | SQL path specification for the schema.| ## Views @@ -30,24 +70,24 @@ The `information_schema.views` view contains information about the views in the It contains the following columns. | Column | Type | Description | -| ---------------- | ------- | ----------------------------------------- | -| table\_catalog | varchar | Name of the current database | -| table\_schema | varchar | Name of the schema that contains the view | -| table\_name | varchar | Name of the view | -| view\_definition | varchar | SQL statement that defines the view | +| :--------------- | :------ | :---------------------------------------- | +| table\_catalog | varchar | Name of the current database. | +| table\_schema | varchar | Name of the schema that contains the view. | +| table\_name | varchar | Name of the view. | +| view\_definition | varchar | SQL statement that defines the view. | - + **TEMPORARY LIMITATION** Users with access to `information_schema.views` can potentially access all views, which poses a security risk. We are working to resolve this limitation. Once the fix is implemented, this message will be removed. - + ## Columns The `information_schema.columns` view contains information about columns of all tables, views, and materialized views in the database. | Column | Type | Description | -| ---------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| :--------------------- | :------ | :----------------------------------------------------------------------------------------------------------------------------------- | | table\_catalog | varchar | Name of the current database. | | table\_schema | varchar | Name of the schema that contains the table, sink, view, or materialized view. The default schema for user-created objects is public. | | table\_name | varchar | Name of the table, sink, view, or materialized view | diff --git a/sql/system-catalogs/pg-catalog.mdx b/sql/system-catalogs/pg-catalog.mdx index d0225ae5..806c6abb 100644 --- a/sql/system-catalogs/pg-catalog.mdx +++ b/sql/system-catalogs/pg-catalog.mdx @@ -5,13 +5,11 @@ description: "RisingWave supports these system catalogs and views of PostgreSQL. For information about RisingWave and PostgreSQL system functions, see [System administration functions](/docs/current/sql-function-sys-admin/) and [System information functions](/docs/current/sql-function-sys-info/). - -**NOTE** - + RisingWave does not fully support all PostgreSQL system catalog columns. - + | Catalog/View Name | Description | -| --- | --- | +| :-- | :-- | | [`pg_am`](https://www.postgresql.org/docs/current/catalog-pg-am.html) | Contains information about relation access methods. | | [`pg_attrdef`](https://www.postgresql.org/docs/current/catalog-pg-attrdef.html) | Contains default values for table columns. | | [`pg_attribute`](https://www.postgresql.org/docs/current/catalog-pg-attribute.html) | Contains information about table columns. | @@ -34,6 +32,7 @@ RisingWave does not fully support all PostgreSQL system catalog columns. | [`pg_proc`](https://www.postgresql.org/docs/current/catalog-pg-proc.html) | Contains information about functions, aggregate functions, and window functions. | | [`pg_range`](https://www.postgresql.org/docs/current/catalog-pg-range.html) | Contains information about range types in the database. | | [`pg_roles`](https://www.postgresql.org/docs/current/view-pg-roles.html) | Contains information about database roles. | +| [`pg_sequence`](https://www.postgresql.org/docs/current/catalog-pg-sequence.html)| Contains information about sequences. | | [`pg_sequences`](https://www.postgresql.org/docs/current/view-pg-sequences.html) | Contains information about each sequence in the database. | | [`pg_settings`](https://www.postgresql.org/docs/current/view-pg-settings.html) | Contains information about run-time parameters of the server. | | [`pg_shadow`](https://www.postgresql.org/docs/current/view-pg-shadow.html) | Contains information about database users. Specifically, it contains information about the login roles that have been created in the database, including their usernames, password hashes, and other authentication-related information. | diff --git a/sql/system-catalogs/rw-catalog.mdx b/sql/system-catalogs/rw-catalog.mdx index 46d230b8..574e40b3 100644 --- a/sql/system-catalogs/rw-catalog.mdx +++ b/sql/system-catalogs/rw-catalog.mdx @@ -75,14 +75,14 @@ SELECT name, initialized_at, created_at FROM rw_sources; ## Available RisingWave catalogs | Relation Name | Description | -| --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :-------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | rw\_actors | Contains the available actor IDs, their statuses, and the corresponding fragment IDs, and parallel unit IDs. | | rw\_actor\_id\_to\_ddl | Contains information about the participants who executed the database schema change operations (DDL) and their corresponding actor\_id identifiers. The outputs include actor IDs, fragment IDs, job IDs, schema IDs, DDL types, and names of the affected object. | | rw\_columns | Contains information about columns of all relations (except sources) in the database, including their names, positions, data types, generation details, and more. | | rw\_connections | Contains details about the connections available in the database, such as their IDs, names, owners, types, and more. | | rw\_databases | Contains information about the databases available in the database, such as the IDs, names, and owners. | | rw\_depend | Contains the dependency relationships between tables, indexes, views, materialized views, sources, and sinks. | -| rw\_ddl\_progress | Contains the progress of running DDL statements. You can use this relation to view the progress of running DDL statements. For details, see [View statement progress](/docs/current/view-statement-progress/). | +| rw\_ddl\_progress | Contains the progress of running DDL statements. You can use this relation to view the progress of running DDL statements. For details, see [Monitor statement progress](/docs/current/monitor-statement-progress/). | | rw\_description | Contains optional descriptions (comments) for each database object. Descriptions can be added with the [COMMENT ON](/docs/current/sql-comment-on/) command and viewed with DESCRIBE or SHOW COLUMNS FROM command. | | rw\_event\_logs | Contains information about events, including event IDs, timestamps, event types, and additional information if available. | | rw\_fragment\_id\_to\_ddl | Contains information about the database schema change operations (DDL) and their corresponding fragment\_id identifiers. The outputs include fragment IDs, job IDs, schema IDs, DDL types, and names of the affected object. | diff --git a/sql/udfs/embedded-python-udfs.mdx b/sql/udfs/embedded-python-udfs.mdx index 3f5cdf43..a6c264f1 100644 --- a/sql/udfs/embedded-python-udfs.mdx +++ b/sql/udfs/embedded-python-udfs.mdx @@ -31,12 +31,10 @@ The Python code must contain a function that has the same name as declared in th See the correspondence between SQL types and Python types in the [Data type mapping](/docs/current/udf-python-embedded/#data-type-mapping). - -**NOTE** - + Due to the nature of Python, the correctness of the source code cannot be verified when creating a function. It is recommended to make sure your implementation is correct through batch query before using UDFs in materialized views. If an error occurs when executing UDF in materialized views, all output results will be NULL. - + ```bash Call function SELECT gcd(15, 25); @@ -156,7 +154,7 @@ Currently, embedded Python UDFs are only allowed to use the following standard l The following table shows the data type mapping between SQL and Python: | SQL Type | Python Type | Notes | -| ---------------- | ------------------------------ | ------------------ | +| :--------------- | :----------------------------- | :----------------- | | BOOLEAN | bool | | | SMALLINT | int | | | INT | int | | diff --git a/sql/udfs/sql-udfs.mdx b/sql/udfs/sql-udfs.mdx index 60429f3b..e86d5cef 100644 --- a/sql/udfs/sql-udfs.mdx +++ b/sql/udfs/sql-udfs.mdx @@ -17,7 +17,7 @@ For more details about the supported syntax, see the [examples of SQL UDFs](#exa ## Parameters | Parameter or clause | Description | -| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| :------------------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _function\_name_ | The name of the SQL UDF that you want to declare in RisingWave. | | _argument\_type_ | The data type of the input parameter(s) that the SQL UDF expects to receive. | | **RETURNS** _return\_type_ | Specifies the data type of the return value from the UDF. | @@ -25,11 +25,9 @@ For more details about the supported syntax, see the [examples of SQL UDFs](#exa | **AS** _as\_definition_ | Defines the implementation of the function using SQL statements. as\_definition can be single quote definition (e.g., 'select $1 + $2') or double dollar definition (e.g., $$select $1 + $1$$). | | **RETURN** _return\_definition_ | Alternative to the AS clause. return\_definition can be an expression (e.g., $1 + $2). Note that **you must specify an AS definition or a RETURN definition, and they can not be specified simultaneously.** | - -**NOTE** - + * Recursive definition is NOT supported at present. For example, the statement `create function recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + recursive($1, $2)';` will fail. - + ## Examples @@ -344,11 +342,9 @@ select regexp_replace_wrapper('Cat is the cutest animal.'); Dog is the cutest animal. ``` - -**NOTE** - + Note that double dollar signs should be used otherwise the parsing will fail. - + --- diff --git a/sql/udfs/use-udfs-in-java.mdx b/sql/udfs/use-udfs-in-java.mdx index 11744511..320ae1ac 100644 --- a/sql/udfs/use-udfs-in-java.mdx +++ b/sql/udfs/use-udfs-in-java.mdx @@ -96,14 +96,14 @@ public class Gcd implements ScalarFunction { } ``` - + **DIFFERENCES WITH FLINK** * The `ScalarFunction` is an interface instead of an abstract class. * Multiple overloaded `eval` methods are not supported. * Variable arguments such as `eval(Integer...)` are not supported. - + ### Table functions A user-defined table function maps zero, one, or multiple scalar values to one or multiple rows (structured types). @@ -126,7 +126,7 @@ public class Series implements TableFunction { } ``` - + **DIFFERENCES WITH FLINK** * The `TableFunction` is an interface instead of an abstract class. It has no generic arguments. @@ -134,7 +134,7 @@ public class Series implements TableFunction { * Multiple overloaded `eval` methods are not supported. * Variable arguments such as `eval(Integer...)` are not supported. * In SQL, table functions can be used in the `FROM` clause directly. `JOIN LATERAL TABLE` is not supported. - + ## 3\. Start a UDF server @@ -197,7 +197,7 @@ SELECT * FROM series(10); The RisingWave Java UDF SDK supports the following data types: | SQL Type | Java Type | Notes | -| ---------------- | --------------------------------------- | ------------------------------------------------------------------------------- | +| :--------------- | :-------------------------------------- | :------------------------------------------------------------------------------ | | BOOLEAN | boolean, Boolean | | | SMALLINT | short, Short | | | INT | int, Integer | | diff --git a/sql/udfs/use-udfs-in-javascript.mdx b/sql/udfs/use-udfs-in-javascript.mdx index fbaa3615..5ce82e54 100644 --- a/sql/udfs/use-udfs-in-javascript.mdx +++ b/sql/udfs/use-udfs-in-javascript.mdx @@ -114,7 +114,7 @@ $$; The following table shows the data type mapping between SQL and JavaScript: | SQL Type | JavaScript Type | Note | -| -------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------ | +| :------------------- | :--------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------- | | boolean | boolean | | | smallint | number | | | int | number | | diff --git a/sql/udfs/use-udfs-in-python.mdx b/sql/udfs/use-udfs-in-python.mdx index fd4ed11b..0621f010 100644 --- a/sql/udfs/use-udfs-in-python.mdx +++ b/sql/udfs/use-udfs-in-python.mdx @@ -22,11 +22,9 @@ Cannot run this command?If "command not found: pip" is returned, [check if pip i - -**NOTE** - + The current Python UDF SDK is supported since version 1.10 and is not supported in older versions. If you are using an older version of RisingWave, please refer to the historical version of the documentation. If you have used an older version of the RisingWave UDF SDK (risingwave 0.1), we strongly encourage you to update to the latest version. You can refer to the [migration guide](#migration-guide-from-risingwave-01-to-arrow-udf-02) for upgrading. Older versions are still supported but will not receive new features or bug fixes. - + ## 2\. Define your functions in a Python file @@ -111,7 +109,7 @@ Finally, the script starts a UDF server using `UdfServer` and listens for incomi -**INFO** + New sample functions are frequently added to `udf.py`, such as JSONB functions. See the [source file](https://github.com/risingwavelabs/risingwave/blob/main/e2e%5Ftest/udf/test.py). @@ -182,7 +180,7 @@ SELECT * FROM series(5); Due to the limitations of the Python interpreter's [Global Interpreter Lock (GIL)](https://realpython.com/python-gil/), the UDF server can only utilize a single CPU core when processing requests. If you find that the throughput of the UDF server is insufficient, consider scaling out the UDF server. -**INFO** + How to determine if the UDF server needs scaling? @@ -218,7 +216,7 @@ Then, you can start a load balancer, such as Nginx. It listens on port 8815 and The RisingWave Python UDF SDK supports the following data types: | SQL Type | Python Type | Notes | -| ---------------- | ------------------------------ | ------------------------------------------------------------------------------ | +| :--------------- | :----------------------------- | :----------------------------------------------------------------------------- | | BOOLEAN | bool | | | SMALLINT | int | | | INT | int | | diff --git a/sql/udfs/use-udfs-in-rust.mdx b/sql/udfs/use-udfs-in-rust.mdx index 15f1b4f3..0de02e33 100644 --- a/sql/udfs/use-udfs-in-rust.mdx +++ b/sql/udfs/use-udfs-in-rust.mdx @@ -179,7 +179,7 @@ SELECT series(5); The following table shows the data type mapping between SQL and Rust: | SQL type | Rust type as argument | Rust type as return value | -| -------------------- | --------------------------- | ------------------------------------------------------ | +| :------------------- | :-------------------------- | :----------------------------------------------------- | | boolean | bool | bool | | smallint | i16 | i16 | | integer | i32 | i32 | diff --git a/troubleshoot/streaming-performance.mdx b/troubleshoot/streaming-performance.mdx index e0931f62..493eb5c4 100644 --- a/troubleshoot/streaming-performance.mdx +++ b/troubleshoot/streaming-performance.mdx @@ -13,7 +13,7 @@ Occasionally, a streaming actor or fragment can become a bottleneck within the o When an actor or fragment performs slower than others, it back-pressures its preceding actors/fragments. Thus, to find the root of backpressure, we need to find the frontmost actors/fragments in the DAG. - + To accomplish this, refer to the Grafana dashboard and navigate to the "Streaming - Backpressure" panel. In the panel, find the channels with high backpressure and identify the frontmost one. diff --git a/troubleshoot/troubleshoot-high-latency.mdx b/troubleshoot/troubleshoot-high-latency.mdx index 1cf61a58..937f3cc5 100644 --- a/troubleshoot/troubleshoot-high-latency.mdx +++ b/troubleshoot/troubleshoot-high-latency.mdx @@ -38,7 +38,7 @@ High latency can be caused by high join amplification. Using low-cardinality columns as equal conditions in joins can result in high join amplification, leading to increased latency. -**INFO** + The term "Cardinality" describes how many distinct values exist in a column. For example, "nation" often has a lower cardinality, while "user\_id" often has a higher cardinality. @@ -77,7 +77,7 @@ ON orders.product_id = product_description.product_id ``` -Suppose `product_id = 1` is a hot-selling product, an update from stream `product_description` with `product_id=1` can match 100K rows from `t1`. +Suppose `product_id = 1` is a hot-selling product, an update from stream `product_description` with `product_id=1` can match 100K rows from `orders`. We can split the MV into multiple MVs: diff --git a/troubleshoot/troubleshoot-oom.mdx b/troubleshoot/troubleshoot-oom.mdx index 80c4f18a..c19116f3 100644 --- a/troubleshoot/troubleshoot-oom.mdx +++ b/troubleshoot/troubleshoot-oom.mdx @@ -59,7 +59,7 @@ If the barrier latency is normal, but the memory usage is still increasing, you We have added a heap profiling utility in the RisingWave Dashboard to help you analyze memory usage and identify memory-related issues. -**INFO** + To enable memory profiling, please set the environment variable `MALLOC_CONF=prof:true` for the compute nodes. diff --git a/troubleshoot/troubleshoot-source-sink.mdx b/troubleshoot/troubleshoot-source-sink.mdx index eec7fcf5..87f3c52a 100644 --- a/troubleshoot/troubleshoot-source-sink.mdx +++ b/troubleshoot/troubleshoot-source-sink.mdx @@ -28,7 +28,7 @@ _Example of a stuck sink:_ -**INFO** + We are currently rolling out sink decoupling to all sinks. Track the latest progress [here](https://github.com/risingwavelabs/risingwave/issues/17095). diff --git a/typos.toml b/typos.toml new file mode 100644 index 00000000..8add8b6b --- /dev/null +++ b/typos.toml @@ -0,0 +1,19 @@ +[default] +extend-ignore-identifiers-re = [ + # base64 + "\\b[0-9A-Za-z+/]{64}(=|==)?\\b", + # ingest/ingest-from-datagen.md + "\\b[0-9A-Za-z]{16}\\b", +] + +[default.extend-identifiers] +# sql/functions-operators/sql-function-string.md +1b69b4ba630f34e = "1b69b4ba630f34e" + +[default.extend-words] +Iy = "Iy" +YTO = "YTO" +# Azure Kubernetes Service +AKS = "AKS" +# schema.history.internal.skip.unparseable.ddl +unparseable="unparseable" \ No newline at end of file