Skip to content

Commit

Permalink
Add presto-native-tests module
Browse files Browse the repository at this point in the history
Co-authored-by: Manoj Negi <[email protected]>
  • Loading branch information
2 people authored and athmaja-n committed Nov 14, 2024
1 parent 5442d1b commit f464733
Show file tree
Hide file tree
Showing 19 changed files with 3,431 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test-other-modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ jobs:
run: |
./mvnw test -T 1 ${MAVEN_TEST} -pl '
!presto-tests,
!presto-native-tests,
!presto-accumulo,
!presto-cassandra,
!presto-hive,
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
<module>presto-test-coverage</module>
<module>presto-hudi</module>
<module>presto-native-execution</module>
<module>presto-native-tests</module>
<module>presto-router</module>
<module>presto-open-telemetry</module>
<module>redis-hbo-provider</module>
Expand Down
31 changes: 31 additions & 0 deletions presto-native-tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Presto Native Tests

This module contains end-to-end tests that run queries from test classes in
the `presto-tests` module with Presto C++ workers. Please build the module
`presto-native-execution` first.

The following command can be used to run all tests in this module:
```
mvn test
-pl 'presto-native-tests'
-Dtest="com.facebook.presto.nativetests.Test*"
-Duser.timezone=America/Bahia_Banderas
-DPRESTO_SERVER=${PRESTO_HOME}/presto-native-execution/cmake-build-debug/presto_cpp/main/presto_server
-DWORKER_COUNT=${WORKER_COUNT} -T1C
```
Please update JVM argument `PRESTO_SERVER` to point to the Presto C++ worker
binary `presto_server`.

## Adding new tests

Presto C++ currently does not have the same behavior as Presto for certain
queries. This could be because of missing types, missing function signatures,
among other reasons. Tests with these unsupported queries are therefore
expected to fail and the test asserts the error message is as expected.

Issues should also be created for the failing queries, so they are documented
and fixed. Please add the tag `presto-native-tests` for these issues.
Once all the failures in a testcase are fixed, the overriden test in this
module should be removed and the testcase in the corresponding base class in
`presto-tests` would be the single source of truth for Presto SQL coverage
tests.
121 changes: 121 additions & 0 deletions presto-native-tests/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-root</artifactId>
<version>0.290-SNAPSHOT</version>
</parent>

<artifactId>presto-native-tests</artifactId>
<name>presto-native-tests</name>
<description>Presto Native Tests</description>

<properties>
<air.main.basedir>${project.parent.basedir}</air.main.basedir>
</properties>

<dependencies>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-native-execution</artifactId>
<version>0.290-SNAPSHOT</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-common</artifactId>
</dependency>

<!-- Presto SPI -->
<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-spi</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-main</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-tests</artifactId>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-tpcds</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<!-- Disable git-commit-id-plugin plugin to allow for running tests without
a git checkout -->
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.basepom.maven</groupId>
<artifactId>duplicate-finder-maven-plugin</artifactId>
<configuration>
<ignoredResourcePatterns>
<ignoredResourcePattern>parquet.thrift</ignoredResourcePattern>
<ignoredResourcePattern>about.html</ignoredResourcePattern>
<ignoredResourcePattern>mozilla/public-suffix-list.txt</ignoredResourcePattern>
<ignoredResourcePattern>iceberg-build.properties</ignoredResourcePattern>
<ignoredResourcePattern>org.apache.avro.data/Json.avsc</ignoredResourcePattern>
</ignoredResourcePatterns>
<ignoredClassPatterns>
<ignoredClassPattern>com.esotericsoftware.kryo.*</ignoredClassPattern>
<ignoredClassPattern>com.esotericsoftware.minlog.Log</ignoredClassPattern>
<ignoredClassPattern>com.esotericsoftware.reflectasm.*</ignoredClassPattern>
<ignoredClassPattern>module-info</ignoredClassPattern>
<ignoredClassPattern>META-INF.versions.9.module-info</ignoredClassPattern>
<ignoredClassPattern>org.apache.avro.*</ignoredClassPattern>
<ignoredClassPattern>com.github.benmanes.caffeine.*</ignoredClassPattern>
<ignoredClassPattern>org.roaringbitmap.*</ignoredClassPattern>
</ignoredClassPatterns>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<argLine>-Xms4g -Xmx4g</argLine>
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<excludedGroups>remote-function,textfile_reader</excludedGroups>
<systemPropertyVariables>
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.nativetests;

import com.facebook.presto.tests.AbstractTestAggregations;
import org.testng.annotations.Test;

import static java.lang.String.format;

public abstract class AbstractTestAggregationsNative
extends AbstractTestAggregations
{
private static final String approxDistributionTypesUnsupportedError = ".*Failed to parse type.*digest.*";

@Override
@Test
public void testApproximateCountDistinct()
{
String signatureUnsupportedError = ".*Aggregate function signature is not supported: presto.default.approx_distinct*";
String timeTypeUnsupportedError = "Failed to parse type.*time";
String charTypeUnsupportedError = "Failed to parse type.*char";
String tsWithTzTypeUnsupportedError = "Timestamp with Timezone type is not supported in Prestissimo";

// test NULL
assertQueryFails("SELECT approx_distinct(NULL)", signatureUnsupportedError, true);
assertQueryFails("SELECT approx_distinct(NULL, 0.023)", signatureUnsupportedError, true);

// test date
assertQuery("SELECT approx_distinct(orderdate) FROM orders", "SELECT 2372");
assertQuery("SELECT approx_distinct(orderdate, 0.023) FROM orders", "SELECT 2372");

// test timestamp
assertQuery("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP)) FROM orders", "SELECT 2347");
assertQuery("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP), 0.023) FROM orders", "SELECT 2347");

// test timestamp with time zone
assertQueryFails("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP WITH TIME ZONE)) FROM orders",
tsWithTzTypeUnsupportedError, true);
assertQueryFails("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP WITH TIME ZONE), 0.023) FROM orders",
tsWithTzTypeUnsupportedError, true);

// test time
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME)) FROM orders", timeTypeUnsupportedError, true);
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME), 0.023) FROM orders", timeTypeUnsupportedError, true);

// test time with time zone
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME WITH TIME ZONE)) FROM orders", timeTypeUnsupportedError, true);
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME WITH TIME ZONE), 0.023) FROM orders", timeTypeUnsupportedError, true);

// test short decimal
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(18, 0))) FROM orders", "SELECT 990");
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(18, 0)), 0.023) FROM orders", "SELECT 990");

// test long decimal
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(25, 20))) FROM orders", "SELECT 1013");
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(25, 20)), 0.023) FROM orders", "SELECT 1013");

// test real
assertQuery("SELECT approx_distinct(CAST(custkey AS REAL)) FROM orders", "SELECT 982");
assertQuery("SELECT approx_distinct(CAST(custkey AS REAL), 0.023) FROM orders", "SELECT 982");

// test bigint
assertQuery("SELECT approx_distinct(custkey) FROM orders", "SELECT 990");
assertQuery("SELECT approx_distinct(custkey, 0.023) FROM orders", "SELECT 990");

// test integer
assertQuery("SELECT approx_distinct(CAST(custkey AS INTEGER)) FROM orders", "SELECT 1028");
assertQuery("SELECT approx_distinct(CAST(custkey AS INTEGER), 0.023) FROM orders", "SELECT 1028");

// test smallint
assertQuery("SELECT approx_distinct(CAST(custkey AS SMALLINT)) FROM orders", "SELECT 1023");
assertQuery("SELECT approx_distinct(CAST(custkey AS SMALLINT), 0.023) FROM orders", "SELECT 1023");

// test tinyint
assertQuery("SELECT approx_distinct(CAST((custkey % 128) AS TINYINT)) FROM orders", "SELECT 128");
assertQuery("SELECT approx_distinct(CAST((custkey % 128) AS TINYINT), 0.023) FROM orders", "SELECT 128");

// test double
assertQuery("SELECT approx_distinct(CAST(custkey AS DOUBLE)) FROM orders", "SELECT 1014");
assertQuery("SELECT approx_distinct(CAST(custkey AS DOUBLE), 0.023) FROM orders", "SELECT 1014");

// test varchar
assertQuery("SELECT approx_distinct(CAST(custkey AS VARCHAR)) FROM orders", "SELECT 1036");
assertQuery("SELECT approx_distinct(CAST(custkey AS VARCHAR), 0.023) FROM orders", "SELECT 1036");

// test char
assertQueryFails("SELECT approx_distinct(CAST(CAST(custkey AS VARCHAR) AS CHAR(20))) FROM orders", charTypeUnsupportedError, true);
assertQueryFails("SELECT approx_distinct(CAST(CAST(custkey AS VARCHAR) AS CHAR(20)), 0.023) FROM orders", charTypeUnsupportedError, true);

// test varbinary
assertQuery("SELECT approx_distinct(to_utf8(CAST(custkey AS VARCHAR))) FROM orders", "SELECT 1036");
assertQuery("SELECT approx_distinct(to_utf8(CAST(custkey AS VARCHAR)), 0.023) FROM orders", "SELECT 1036");
}

@Override
@Test(dataProvider = "getType")
public void testStatisticalDigest(String type)
{
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE)), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2, 0.0001E0), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3, 0.0001E0), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4, 0.0001E0), 0.5E0) > 0 FROM lineitem", type), approxDistributionTypesUnsupportedError, true);
}

/**
* Comprehensive correctness testing is done in the TestQuantileDigestAggregationFunction and TestTDigestAggregationFunction
*/
@Override
@Test(dataProvider = "getType")
public void testStatisticalDigestGroupBy(String type)
{
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE)), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2, 0.0001E0), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3, 0.0001E0), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4, 0.0001E0), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
approxDistributionTypesUnsupportedError, true);
}

/**
* Comprehensive correctness testing is done in the TestMergeQuantileDigestFunction and TestMergeTDigestFunction
*/
@Override
@Test(dataProvider = "getType")
public void testStatisticalDigestMerge(String type)
{
assertQueryFails(format("SELECT value_at_quantile(merge(%s), 0.5E0) > 0 FROM (SELECT partkey, %s_agg(CAST(orderkey AS DOUBLE)) as %s FROM lineitem GROUP BY partkey)",
type,
type,
type),
approxDistributionTypesUnsupportedError, true);
}

/**
* Comprehensive correctness testing is done in the TestMergeQuantileDigestFunction and TestMergeTDigestFunction
*/
@Override
@Test(dataProvider = "getType")
public void testStatisticalDigestMergeGroupBy(String type)
{
assertQueryFails(format("SELECT partkey, value_at_quantile(merge(%s), 0.5E0) > 0 " +
"FROM (SELECT partkey, suppkey, %s_agg(CAST(orderkey AS DOUBLE)) as %s FROM lineitem GROUP BY partkey, suppkey)" +
"GROUP BY partkey",
type,
type,
type),
approxDistributionTypesUnsupportedError, true);
}

@Override
@Test
public void testSumDataSizeForStats()
{
// varchar
assertQuery("SELECT \"sum_data_size_for_stats\"(comment) FROM orders", "SELECT 787364");

// char
// Presto removes trailing whitespaces when casting to CHAR.
// Hard code the expected data size since there is no easy to way to compute it in H2.
assertQueryFails("SELECT \"sum_data_size_for_stats\"(CAST(comment AS CHAR(1000))) FROM orders",
"Failed to parse type \\[char\\(1000\\)]", true);

// varbinary
assertQuery("SELECT \"sum_data_size_for_stats\"(CAST(comment AS VARBINARY)) FROM orders", "SELECT 787364");

// array
assertQuery("SELECT \"sum_data_size_for_stats\"(ARRAY[comment]) FROM orders", "SELECT 847364");
assertQuery("SELECT \"sum_data_size_for_stats\"(ARRAY[comment, comment]) FROM orders", "SELECT 1634728");

// map
assertQuery("SELECT \"sum_data_size_for_stats\"(map(ARRAY[1], ARRAY[comment])) FROM orders", "SELECT 907364");
assertQuery("SELECT \"sum_data_size_for_stats\"(map(ARRAY[1, 2], ARRAY[comment, comment])) FROM orders", "SELECT 1754728");

// row
assertQuery("SELECT \"sum_data_size_for_stats\"(ROW(comment)) FROM orders", "SELECT 847364");
assertQuery("SELECT \"sum_data_size_for_stats\"(ROW(comment, comment)) FROM orders", "SELECT 1634728");
}

@Override
@Test
public void testMaxDataSizeForStats()
{
// varchar
assertQuery("SELECT \"max_data_size_for_stats\"(comment) FROM orders", "select 82");

// char
assertQueryFails("SELECT \"max_data_size_for_stats\"(CAST(comment AS CHAR(1000))) FROM orders",
"Failed to parse type \\[char\\(1000\\)]", true);

// varbinary
assertQuery("SELECT \"max_data_size_for_stats\"(CAST(comment AS VARBINARY)) FROM orders", "select 82");

// max_data_size_for_stats is not needed for array, map and row
}
}
Loading

0 comments on commit f464733

Please sign in to comment.