diff --git a/.github/workflows/build-dev-image.yml b/.github/workflows/build-dev-image.yml index 419087877..1476e333a 100644 --- a/.github/workflows/build-dev-image.yml +++ b/.github/workflows/build-dev-image.yml @@ -11,7 +11,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Set up JDK 21 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '21' @@ -57,7 +57,7 @@ jobs: build-args: | WREN_VERSION=${{ steps.prepare.outputs.WREN_VERSION }} push: true - build-ibis-image: + build-ibis-amd64-image: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -86,6 +86,38 @@ jobs: build-contexts: | wren-core-py=./wren-core-py wren-core=./wren-core - platforms: linux/amd64,linux/arm64 + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + build-ibis-arm64-image: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/canner/wren-engine-ibis + tags: | + type=sha + type=raw,value=nightly + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./ibis-server + build-contexts: | + wren-core-py=./wren-core-py + wren-core=./wren-core + platforms: linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index 3adb681a3..cd197bb37 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -29,7 +29,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up JDK 21 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '21' @@ -69,7 +69,7 @@ jobs: build-args: | WREN_VERSION=${{ steps.prepare.outputs.WREN_VERSION }} push: true - build-ibis-image: + build-ibis-amd64-image: needs: prepare-tag runs-on: ubuntu-latest steps: @@ -98,6 +98,38 @@ jobs: build-contexts: | wren-core-py=./wren-core-py wren-core=./wren-core - platforms: linux/amd64,linux/arm64 + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + build-ibis-arm64-image: + needs: prepare-tag + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/canner/wren-engine-ibis + tags: | + type=raw,value=${{ needs.prepare-tag.outputs.tag_name }} + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./ibis-server + build-contexts: | + wren-core-py=./wren-core-py + wren-core=./wren-core + platforms: linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/ibis-ci.yml b/.github/workflows/ibis-ci.yml index 2cb3cb026..239f964cf 100644 --- a/.github/workflows/ibis-ci.yml +++ b/.github/workflows/ibis-ci.yml @@ -65,10 +65,11 @@ jobs: run: | just install just install-core + just update-core - name: Run tests env: WREN_ENGINE_ENDPOINT: http://localhost:8080 - run: poetry run pytest -m "not bigquery and not snowflake and not trino and not canner" + run: poetry run pytest -m "not bigquery and not snowflake and not canner" - name: Test bigquery if need if: contains(github.event.pull_request.labels.*.name, 'bigquery') env: @@ -84,8 +85,3 @@ jobs: SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} run: just test snowflake - - name: Test trino if need - if: contains(github.event.pull_request.labels.*.name, 'trino') - env: - WREN_ENGINE_ENDPOINT: http://localhost:8080 - run: just test trino diff --git a/.github/workflows/maven-tests.yml b/.github/workflows/maven-tests.yml index 954821c19..a5dad947b 100644 --- a/.github/workflows/maven-tests.yml +++ b/.github/workflows/maven-tests.yml @@ -21,7 +21,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up JDK 21 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '21' diff --git a/.github/workflows/stable-release.yml b/.github/workflows/stable-release.yml index b3112f103..d7615c3db 100644 --- a/.github/workflows/stable-release.yml +++ b/.github/workflows/stable-release.yml @@ -45,7 +45,7 @@ jobs: git push echo "value=$version" >> $GITHUB_OUTPUT - name: Set up JDK 21 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '21' @@ -72,7 +72,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up JDK 21 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '21' @@ -113,7 +113,7 @@ jobs: build-args: | WREN_VERSION=${{ steps.prepare.outputs.WREN_VERSION }} push: true - stable-release-ibis: + stable-release-ibis-amd64: needs: prepare-version runs-on: ubuntu-latest steps: @@ -145,6 +145,41 @@ jobs: build-contexts: | wren-core-py=./wren-core-py wren-core=./wren-core - platforms: linux/amd64,linux/arm64 + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + stable-release-ibis-arm64: + needs: prepare-version + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/canner/wren-engine-ibis + tags: | + type=raw,value=${{ needs.prepare-version.outputs.next_version }} + type=raw,value=latest + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: ./ibis-server + build-args: | + RUST_PROFILE=--release + build-contexts: | + wren-core-py=./wren-core-py + wren-core=./wren-core + platforms: linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/README.md b/README.md index 734622187..2547c5316 100644 --- a/README.md +++ b/README.md @@ -23,16 +23,6 @@

-## Wren AI @ Hacktoberfest 2024 - Oct 1 till Oct 31 - -[Hacktoberfest 2024](https://hacktoberfest.com/) is here, and we're inviting developers of all levels to join our open-source community. Together, we'll build Wren AI as a friendly community for all. - -👉 Learn how to [Win Wren AI Exclusive Swag Pack & Holopin From Digital Ocean Rewards](https://getwren.ai/wren-ai-hacktoberfest-2024)! - -[![image](https://github.com/user-attachments/assets/9048d701-a97b-4c6b-b3ed-fc636201f234)](https://getwren.ai/wren-ai-hacktoberfest-2024) - ---- - > Wren Engine is the semantic engine for LLMs, the backbone of the [Wren AI](https://github.com/Canner/WrenAI) project. diff --git a/ibis-server/app/model/data_source.py b/ibis-server/app/model/data_source.py index 5e6a7a34e..848ad5e33 100644 --- a/ibis-server/app/model/data_source.py +++ b/ibis-server/app/model/data_source.py @@ -120,22 +120,16 @@ def get_druid_connection(info: DruidConnectionInfo) -> BaseBackend: path=info.path, ) - @staticmethod - def get_druid_connection(info: DruidConnectionInfo) -> BaseBackend: - return ibis.druid.connect( - host=info.host.get_secret_value(), - port=int(info.port.get_secret_value()), - path=info.path, - ) - - @staticmethod - def get_mssql_connection(info: MSSqlConnectionInfo) -> BaseBackend: + @classmethod + def get_mssql_connection(cls, info: MSSqlConnectionInfo) -> BaseBackend: return ibis.mssql.connect( host=info.host.get_secret_value(), port=info.port.get_secret_value(), database=info.database.get_secret_value(), user=info.user.get_secret_value(), - password=info.password.get_secret_value(), + password=cls._escape_special_characters_for_odbc( + info.password.get_secret_value() + ), driver=info.driver, TDS_Version=info.tds_version, **info.kwargs if info.kwargs else dict(), @@ -181,3 +175,7 @@ def get_trino_connection(info: TrinoConnectionInfo) -> BaseBackend: user=(info.user and info.user.get_secret_value()), password=(info.password and info.password.get_secret_value()), ) + + @staticmethod + def _escape_special_characters_for_odbc(value: str) -> str: + return "{" + value.replace("}", "}}") + "}" diff --git a/ibis-server/app/model/metadata/dto.py b/ibis-server/app/model/metadata/dto.py index 3865f9ade..ca1b1cc2a 100644 --- a/ibis-server/app/model/metadata/dto.py +++ b/ibis-server/app/model/metadata/dto.py @@ -75,7 +75,8 @@ class Column(BaseModel): class TableProperties(BaseModel): - schema: str | None + # To prevent schema shadowing in Pydantic, avoid using schema as a field name + schema_: str | None = Field(alias="schema", default=None) catalog: str | None table: str | None # only table name without schema or catalog diff --git a/ibis-server/tests/routers/v2/connector/test_mssql.py b/ibis-server/tests/routers/v2/connector/test_mssql.py index 1e3061981..b2e4b73cc 100644 --- a/ibis-server/tests/routers/v2/connector/test_mssql.py +++ b/ibis-server/tests/routers/v2/connector/test_mssql.py @@ -1,4 +1,5 @@ import base64 +import urllib import orjson import pandas as pd @@ -77,7 +78,9 @@ def manifest_str(): @pytest.fixture(scope="module") def mssql(request) -> SqlServerContainer: mssql = SqlServerContainer( - "mcr.microsoft.com/mssql/server:2019-CU27-ubuntu-20.04", dialect="mssql+pyodbc" + "mcr.microsoft.com/mssql/server:2019-CU27-ubuntu-20.04", + dialect="mssql+pyodbc", + password="{R;3G1/8Al2AniRye", ).start() engine = sqlalchemy.create_engine( f"{mssql.get_connection_url()}?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=YES" @@ -150,6 +153,7 @@ def test_query(manifest_str, mssql: SqlServerContainer): "bytea_column": "object", } + @pytest.mark.skip("Wait ibis handle special characters in connection string") def test_query_with_connection_url(manifest_str, mssql: SqlServerContainer): connection_url = _to_connection_url(mssql) response = client.post( @@ -389,4 +393,4 @@ def _to_connection_info(mssql: SqlServerContainer): def _to_connection_url(mssql: SqlServerContainer): info = _to_connection_info(mssql) - return f"mssql://{info['user']}:{info['password']}@{info['host']}:{info['port']}/{info['database']}?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=YES" + return f"mssql://{info['user']}:{urllib.parse.quote_plus(info['password'])}@{info['host']}:{info['port']}/{info['database']}?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=YES" diff --git a/wren-core/core/src/mdl/context.rs b/wren-core/core/src/mdl/context.rs index b87d0690f..cd0c38be3 100644 --- a/wren-core/core/src/mdl/context.rs +++ b/wren-core/core/src/mdl/context.rs @@ -7,7 +7,7 @@ use crate::logical_plan::analyze::model_anlayze::ModelAnalyzeRule; use crate::logical_plan::analyze::model_generation::ModelGenerationRule; use crate::logical_plan::utils::create_schema; use crate::mdl::manifest::Model; -use crate::mdl::{AnalyzedWrenMDL, WrenMDL}; +use crate::mdl::{AnalyzedWrenMDL, SessionStateRef, WrenMDL}; use async_trait::async_trait; use datafusion::arrow::datatypes::SchemaRef; use datafusion::catalog::Session; @@ -41,7 +41,7 @@ use datafusion::optimizer::rewrite_disjunctive_predicate::RewriteDisjunctivePred use datafusion::optimizer::scalar_subquery_to_join::ScalarSubqueryToJoin; use datafusion::optimizer::single_distinct_to_groupby::SingleDistinctToGroupBy; use datafusion::optimizer::unwrap_cast_in_comparison::UnwrapCastInComparison; -use datafusion::optimizer::OptimizerRule; +use datafusion::optimizer::{AnalyzerRule, OptimizerRule}; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; use datafusion::sql::TableReference; @@ -68,20 +68,47 @@ pub async fn create_ctx_with_mdl( let new_state = SessionStateBuilder::new_from_existing( reset_default_catalog_schema.clone().read().deref().clone(), - ) - .with_analyzer_rules(vec![ + ); + + let new_state = if is_local_runtime { + new_state.with_analyzer_rules(analyze_rule_for_local_runtime( + Arc::clone(&analyzed_mdl), + reset_default_catalog_schema.clone(), + )) + // The plan will be executed locally, so apply the default optimizer rules + } else { + new_state + .with_analyzer_rules(analyze_rule_for_unparsing( + Arc::clone(&analyzed_mdl), + reset_default_catalog_schema.clone(), + )) + .with_optimizer_rules(optimize_rule_for_unparsing()) + }; + + let new_state = new_state.with_config(config).build(); + let ctx = SessionContext::new_with_state(new_state); + register_table_with_mdl(&ctx, analyzed_mdl.wren_mdl()).await?; + Ok(ctx) +} + +// Analyzer rules for local runtime +fn analyze_rule_for_local_runtime( + analyzed_mdl: Arc, + session_state_ref: SessionStateRef, +) -> Vec> { + vec![ // expand the view should be the first rule Arc::new(ExpandWrenViewRule::new( Arc::clone(&analyzed_mdl), - Arc::clone(&reset_default_catalog_schema), + Arc::clone(&session_state_ref), )), Arc::new(ModelAnalyzeRule::new( Arc::clone(&analyzed_mdl), - Arc::clone(&reset_default_catalog_schema), + Arc::clone(&session_state_ref), )), Arc::new(ModelGenerationRule::new( Arc::clone(&analyzed_mdl), - reset_default_catalog_schema, + session_state_ref, )), Arc::new(InlineTableScan::new()), // Every rule that will generate [Expr::Wildcard] should be placed in front of [ExpandWildcardRule]. @@ -89,19 +116,37 @@ pub async fn create_ctx_with_mdl( // [Expr::Wildcard] should be expanded before [TypeCoercion] Arc::new(TypeCoercion::new()), Arc::new(CountWildcardRule::new()), - ]); - - let new_state = if is_local_runtime { - // The plan will be executed locally, so apply the default optimizer rules - new_state - } else { - new_state.with_optimizer_rules(optimize_rule_for_unparsing()) - }; + ] +} - let new_state = new_state.with_config(config).build(); - let ctx = SessionContext::new_with_state(new_state); - register_table_with_mdl(&ctx, analyzed_mdl.wren_mdl()).await?; - Ok(ctx) +// Analyze rules for local runtime +fn analyze_rule_for_unparsing( + analyzed_mdl: Arc, + session_state_ref: SessionStateRef, +) -> Vec> { + vec![ + // expand the view should be the first rule + Arc::new(ExpandWrenViewRule::new( + Arc::clone(&analyzed_mdl), + Arc::clone(&session_state_ref), + )), + Arc::new(ModelAnalyzeRule::new( + Arc::clone(&analyzed_mdl), + Arc::clone(&session_state_ref), + )), + Arc::new(ModelGenerationRule::new( + Arc::clone(&analyzed_mdl), + session_state_ref, + )), + Arc::new(InlineTableScan::new()), + // Every rule that will generate [Expr::Wildcard] should be placed in front of [ExpandWildcardRule]. + Arc::new(ExpandWildcardRule::new()), + // [Expr::Wildcard] should be expanded before [TypeCoercion] + Arc::new(TypeCoercion::new()), + // Disable it to avoid generate the alias name, `count(*)` because BigQuery doesn't allow + // the special character `*` in the alias name + // Arc::new(CountWildcardRule::new()), + ] } /// Optimizer rules for unparse diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index d768caa81..ecf2ca06f 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -857,6 +857,18 @@ mod test { Ok(()) } + #[tokio::test] + async fn test_disable_count_wildcard_rule() -> Result<()> { + let ctx = SessionContext::new(); + + let analyzed_mdl = Arc::new(AnalyzedWrenMDL::default()); + let sql = "select count(*) from (select 1)"; + let actual = + transform_sql_with_ctx(&ctx, Arc::clone(&analyzed_mdl), &[], sql).await?; + assert_eq!(actual, "SELECT count(*) FROM (SELECT 1)"); + Ok(()) + } + async fn assert_sql_valid_executable(sql: &str) -> Result<()> { let ctx = SessionContext::new(); // To roundtrip testing, we should register the mock table for the planned sql.