Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to hive 4.0.1 #33750

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run.",
"modification": 1
"modification": 4
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 3
"modification": 4
}
15 changes: 0 additions & 15 deletions .github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,28 +87,13 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
with:
java-version: |
8
11
- name: run HCatalog IO build script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:hcatalog:build
arguments: |
-PdisableSpotlessCheck=true \
-PdisableCheckStyle=true \
# TODO(https://github.com/apache/beam/issues/32189) remove when embedded hive supports Java11
- name: Test HCatalog IO on Java8
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:hcatalog:test
arguments: |
-PdisableSpotlessCheck=true \
-PdisableCheckStyle=true \
-Dfile.encoding=UTF-8 \
-PtestJavaVersion=8 \
-Pjava8Home=$JAVA_HOME_8_X64 \
- name: Archive JUnit Test Results
uses: actions/upload-artifact@v4
if: ${{ !success() }}
Expand Down
15 changes: 0 additions & 15 deletions .github/workflows/beam_PreCommit_Java_IOs_Direct.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,6 @@ jobs:
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Setup environment
uses: ./.github/actions/setup-environment-action
with:
java-version: |
8
11
- name: run Java IOs PreCommit script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
Expand All @@ -98,17 +94,6 @@ jobs:
-PdisableSpotlessCheck=true \
-PdisableCheckStyle=true \
-Dfile.encoding=UTF-8 \
# TODO(https://github.com/apache/beam/issues/32189) remove when embedded hive supports Java11
- name: run Java8 IOs PreCommit script
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:hcatalog:build
arguments: |
-PdisableSpotlessCheck=true \
-PdisableCheckStyle=true \
-Dfile.encoding=UTF-8 \
-PtestJavaVersion=8 \
-Pjava8Home=$JAVA_HOME_8_X64 \
- name: Archive JUnit Test Results
uses: actions/upload-artifact@v4
if: ${{ !success() }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ public static void verifyDeterministic(Coder<?> target, String message, Iterable
}
}

public static <T> long getEncodedElementByteSizeUsingCoder(Coder<T> target, T value) throws Exception {
public static <T> long getEncodedElementByteSizeUsingCoder(Coder<T> target, T value)
throws Exception {
return target.getEncodedElementByteSize(value);
}
/**
Expand Down
5 changes: 3 additions & 2 deletions sdks/java/extensions/sql/hcatalog/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ applyJavaNature(
],
)

def hive_version = "3.1.3"
def netty_version = "4.1.51.Final"
def hive_version = "4.0.1"
def netty_version = "4.1.82.Final" // in sync with hive-exec's dependency

/*
* We need to rely on manually specifying these evaluationDependsOn to ensure that
Expand All @@ -43,6 +43,7 @@ dependencies {
implementation project(":sdks:java:core")
implementation library.java.vendored_guava_32_1_2_jre

testImplementation library.java.junit
testImplementation project(":sdks:java:io:hcatalog").sourceSets.test.output
// Needed for HCatalogTableProvider tests,
// they use HCat* types
Expand Down
1 change: 0 additions & 1 deletion sdks/java/io/expansion-service/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ dependencies {
// For writing to GCS
runtimeOnly library.java.bigdataoss_gcs_connector
// HiveCatalog
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:1.4.2")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive")
// BigQueryMetastoreCatalog (Java 11+)
runtimeOnly project(path: ":sdks:java:io:iceberg:bqms", configuration: "shadow")
Expand Down
19 changes: 6 additions & 13 deletions sdks/java/io/hcatalog/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ applyJavaNature(
description = "Apache Beam :: SDKs :: Java :: IO :: HCatalog"
ext.summary = "IO to read and write for HCatalog source."

// hive 4.x is compatible with Hadoop 3.x; Hive 3.x has been EOL as of Oct 2024
def hadoopVersions = [
"2102": "2.10.2",
"324": "3.2.4",
"336": "3.3.6",
// "341": "3.4.1", // tests already exercised on the default version
]

hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}

def hive_version = "3.1.3"
def hive_version = "4.0.1"

dependencies {
implementation library.java.vendored_guava_32_1_2_jre
Expand All @@ -64,6 +64,10 @@ dependencies {
testImplementation library.java.hamcrest
testImplementation "org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version:tests"
testImplementation "org.apache.hive:hive-exec:$hive_version"
// datanucleus dependency version should be in alignment with managed dependencies of hive-standalone-metastore
testRuntimeOnly 'org.datanucleus:datanucleus-api-jdo:5.2.8'
testRuntimeOnly 'org.datanucleus:datanucleus-rdbms:5.2.10'
testRuntimeOnly 'org.datanucleus:javax.jdo:3.2.0-release'
testImplementation "org.apache.hive:hive-common:$hive_version"
testImplementation "org.apache.hive:hive-cli:$hive_version"
testImplementation "org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version"
Expand Down Expand Up @@ -105,14 +109,3 @@ hadoopVersions.each { kv ->
include '**/*Test.class'
}
}

project.tasks.withType(Test).configureEach {
if (JavaVersion.VERSION_1_8.compareTo(JavaVersion.current()) < 0 && project.findProperty('testJavaVersion') != '8') {
useJUnit {
filter {
excludeTestsMatching "org.apache.beam.sdk.io.hcatalog.HCatalogIOTest"
excludeTestsMatching "org.apache.beam.sdk.io.hcatalog.HCatalogBeamSchemaTest"
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.DriverFactory;
import org.apache.hadoop.hive.ql.IDriver;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
import org.apache.hadoop.hive.ql.session.SessionState;

/**
Expand Down Expand Up @@ -58,11 +58,11 @@ public EmbeddedMetastoreService(String baseDirPath) throws IOException {
String testWarehouseDirPath = makePathASafeFileName(testDataDirPath + "/warehouse");

hiveConf = new HiveConf(getClass());
hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, "");
hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, "");
hiveConf.setVar(HiveConf.ConfVars.PRE_EXEC_HOOKS, "");
hiveConf.setVar(HiveConf.ConfVars.POST_EXEC_HOOKS, "");
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, testWarehouseDirPath);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true);
hiveConf.setVar(HiveConf.ConfVars.METASTORE_WAREHOUSE, testWarehouseDirPath);
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_METADATA_QUERIES, true);
hiveConf.setVar(
HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
"org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider");
Expand All @@ -75,9 +75,10 @@ public EmbeddedMetastoreService(String baseDirPath) throws IOException {

/** Executes the passed query on the embedded metastore service. */
public void executeQuery(String query) {
CommandProcessorResponse response = driver.run(query);
if (response.failed()) {
throw new RuntimeException(response.getException());
try {
driver.run(query);
} catch (CommandProcessorException e) {
throw new RuntimeException(e);
}
}

Expand Down
5 changes: 3 additions & 2 deletions sdks/java/io/iceberg/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
def iceberg_version = "1.6.1"
def parquet_version = "1.12.0"
def orc_version = "1.9.2"
def hive_version = "3.1.3"
def hive_version = "4.0.1"

dependencies {
implementation library.java.vendored_guava_32_1_2_jre
Expand Down Expand Up @@ -74,8 +74,9 @@ dependencies {
// Hive catalog test dependencies
testImplementation project(path: ":sdks:java:io:iceberg:hive")
testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version"
testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation ("org.apache.hive:hive-iceberg-catalog:$hive_version")
testImplementation ("org.apache.hive:hive-metastore:$hive_version")
testImplementation ("org.apache.hive:hive-standalone-metastore-server:$hive_version")
testImplementation "org.assertj:assertj-core:3.11.1"
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
Expand Down
10 changes: 5 additions & 5 deletions sdks/java/io/iceberg/hive/build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import groovy.json.JsonOutput

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -27,16 +25,18 @@ applyJavaNature(
description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg :: Hive"
ext.summary = "Runtime dependencies needed for Hive catalog integration."

def hive_version = "3.1.3"
def hive_version = "4.0.1"
def hbase_version = "2.6.1-hadoop3"
def hadoop_version = "3.4.1"
def iceberg_version = "1.6.1"
def avatica_version = "1.25.0"

dependencies {
// dependencies needed to run with iceberg's hive catalog
// these dependencies are going to be included in io-expansion-service
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
runtimeOnly ("org.apache.hive:hive-iceberg-handler:$hive_version") {
// prefer to use unshaded hive-exec
exclude group: "org.apache.hive", module: "hive-exec"
}
// analyzeClassesDependencies fails with "Cannot accept visitor on URL", likely the plugin does not recognize "core" classifier
// use "core" classifier to depend on un-shaded jar
runtimeOnly ("org.apache.hive:hive-exec:$hive_version:core") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,12 @@ public void catalogCleanup() throws Exception {

@Override
public Map<String, Object> managedIcebergConfig(String tableId) {
String metastoreUri = hiveMetastoreExtension.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS);
String metastoreUri =
hiveMetastoreExtension.hiveConf().getVar(HiveConf.ConfVars.METASTORE_URIS);

Map<String, String> confProperties =
ImmutableMap.<String, String>builder()
.put(HiveConf.ConfVars.METASTOREURIS.varname, metastoreUri)
.put(HiveConf.ConfVars.METASTORE_URIS.varname, metastoreUri)
.build();

return ImmutableMap.<String, Object>builder()
Expand Down
Loading
Loading