Skip to content

Commit

Permalink
[#5542][#5482] improvment(hadoop-catalog): Update Hadoop version in H…
Browse files Browse the repository at this point in the history
…adoop catalog module from 3.1 to 3.3 (#5553)

### What changes were proposed in this pull request?

Change the version of hadoop common and hadoop client from 3.1.0 to
3.3.0

### Why are the changes needed?

3.1.0 is old and need to be update-to-date. 

Fix: #5542
Fix: #5482

### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

Existing CI.
  • Loading branch information
yuqi1129 authored Nov 12, 2024
1 parent 7761440 commit 3ccd89a
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 51 deletions.
8 changes: 4 additions & 4 deletions bundles/aliyun-bundle/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ dependencies {
compileOnly(libs.hadoop3.common)
implementation(libs.hadoop3.oss)

// oss needs StringUtils from commons-lang or the following error will occur in 3.1.0
// java.lang.NoClassDefFoundError: org/apache/commons/lang/StringUtils
// oss needs StringUtils from commons-lang3 or the following error will occur in 3.3.0
// java.lang.NoClassDefFoundError: org/apache/commons/lang3/StringUtils
// org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystemStore.initialize(AliyunOSSFileSystemStore.java:111)
// org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem.initialize(AliyunOSSFileSystem.java:323)
// org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3611)
implementation(libs.commons.lang)
implementation(libs.commons.lang3)
implementation(project(":catalogs:catalog-common")) {
exclude("*")
}
Expand All @@ -48,7 +48,7 @@ tasks.withType(ShadowJar::class.java) {

// Relocate dependencies to avoid conflicts
relocate("org.jdom", "org.apache.gravitino.shaded.org.jdom")
relocate("org.apache.commons.lang", "org.apache.gravitino.shaded.org.apache.commons.lang")
relocate("org.apache.commons.lang3", "org.apache.gravitino.shaded.org.apache.commons.lang3")
}

tasks.jar {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.io.IOException;
import java.net.URI;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.gravitino.Catalog;
import org.apache.gravitino.NameIdentifier;
import org.apache.gravitino.Schema;
Expand All @@ -36,18 +37,14 @@
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIf;

@Tag("gravitino-docker-test")
@Disabled(
"Disabled due to we don't have a real GCP account to test. If you have a GCP account,"
+ "please change the configuration(YOUR_KEY_FILE, YOUR_BUCKET) and enable this test.")
@EnabledIf(value = "isGCPConfigured", disabledReason = "GCP is not configured.")
public class HadoopGCSCatalogIT extends HadoopCatalogIT {

public static final String BUCKET_NAME = "YOUR_BUCKET";
public static final String SERVICE_ACCOUNT_FILE = "YOUR_KEY_FILE";
public static final String BUCKET_NAME = System.getenv("GCS_BUCKET_NAME");
public static final String SERVICE_ACCOUNT_FILE = System.getenv("GCS_SERVICE_ACCOUNT_JSON_PATH");

@Override
public void startIntegrationTest() throws Exception {
Expand Down Expand Up @@ -172,4 +169,9 @@ public void testCreateSchemaAndFilesetWithSpecialLocation() {
// Delete catalog
metalake.dropCatalog(localCatalogName, true);
}

private static boolean isGCPConfigured() {
return StringUtils.isNotBlank(System.getenv("GCS_SERVICE_ACCOUNT_JSON_PATH"))
&& StringUtils.isNotBlank(System.getenv("GCS_BUCKET_NAME"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,19 @@
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIf;
import org.junit.platform.commons.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Disabled(
"Disabled due to we don't have a real OSS account to test. If you have a GCP account,"
+ "please change the configuration(BUCKET_NAME, OSS_ACCESS_KEY, OSS_SECRET_KEY, "
+ "OSS_ENDPOINT) and enable this test.")
@EnabledIf(value = "ossIsConfigured", disabledReason = "OSS is not configured.")
public class HadoopOSSCatalogIT extends HadoopCatalogIT {
private static final Logger LOG = LoggerFactory.getLogger(HadoopOSSCatalogIT.class);
public static final String BUCKET_NAME = "YOUR_BUCKET";
public static final String OSS_ACCESS_KEY = "YOUR_OSS_ACCESS_KEY";
public static final String OSS_SECRET_KEY = "YOUR_OSS_SECRET_KEY";
public static final String OSS_ENDPOINT = "YOUR_OSS_ENDPOINT";
public static final String BUCKET_NAME = System.getenv("OSS_BUCKET_NAME");
public static final String OSS_ACCESS_KEY = System.getenv("OSS_ACCESS_KEY_ID");
public static final String OSS_SECRET_KEY = System.getenv("OSS_SECRET_ACCESS_KEY");
public static final String OSS_ENDPOINT = System.getenv("OSS_ENDPOINT");

@VisibleForTesting
public void startIntegrationTest() throws Exception {}
Expand Down Expand Up @@ -197,4 +195,11 @@ public void testCreateSchemaAndFilesetWithSpecialLocation() {
// Delete catalog
metalake.dropCatalog(localCatalogName, true);
}

protected static boolean ossIsConfigured() {
return StringUtils.isNotBlank(System.getenv("OSS_ACCESS_KEY_ID"))
&& StringUtils.isNotBlank(System.getenv("OSS_SECRET_ACCESS_KEY"))
&& StringUtils.isNotBlank(System.getenv("OSS_ENDPOINT"))
&& StringUtils.isNotBlank(System.getenv("OSS_BUCKET_NAME"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import org.apache.commons.lang3.JavaVersion;
import org.apache.commons.lang3.SystemUtils;
import org.apache.gravitino.Catalog;
import org.apache.gravitino.NameIdentifier;
import org.apache.gravitino.Schema;
Expand Down Expand Up @@ -65,6 +63,8 @@
import org.junit.jupiter.api.condition.EnabledIf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testcontainers.shaded.org.apache.commons.lang3.JavaVersion;
import org.testcontainers.shaded.org.apache.commons.lang3.SystemUtils;

@Tag("gravitino-docker-test")
public class HadoopUserImpersonationIT extends BaseIT {
Expand Down
15 changes: 12 additions & 3 deletions clients/client-python/tests/integration/test_gvfs_with_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,21 @@
logger = logging.getLogger(__name__)


@unittest.skip("This test require GCS service account key file")
def oss_is_configured():
return all(
[
os.environ.get("GCS_SERVICE_ACCOUNT_JSON_PATH") is not None,
os.environ.get("GCS_BUCKET_NAME") is not None,
]
)


@unittest.skipUnless(oss_is_configured(), "GCS is not configured.")
class TestGvfsWithGCS(TestGvfsWithHDFS):
# Before running this test, please set the make sure gcp-bundle-x.jar has been
# copy to the $GRAVITINO_HOME/catalogs/hadoop/libs/ directory
key_file = "your_key_file.json"
bucket_name = "your_bucket_name"
key_file = os.environ.get("GCS_SERVICE_ACCOUNT_JSON_PATH")
bucket_name = os.environ.get("GCS_BUCKET_NAME")
metalake_name: str = "TestGvfsWithGCS_metalake" + str(randint(1, 10000))

def setUp(self):
Expand Down
21 changes: 16 additions & 5 deletions clients/client-python/tests/integration/test_gvfs_with_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,25 @@
logger = logging.getLogger(__name__)


@unittest.skip("This test require oss service account")
def oss_is_configured():
return all(
[
os.environ.get("OSS_ACCESS_KEY_ID") is not None,
os.environ.get("OSS_SECRET_ACCESS_KEY") is not None,
os.environ.get("OSS_ENDPOINT") is not None,
os.environ.get("OSS_BUCKET_NAME") is not None,
]
)


@unittest.skipUnless(oss_is_configured(), "OSS is not configured.")
class TestGvfsWithOSS(TestGvfsWithHDFS):
# Before running this test, please set the make sure aliyun-bundle-x.jar has been
# copy to the $GRAVITINO_HOME/catalogs/hadoop/libs/ directory
oss_access_key = "your_access_key"
oss_secret_key = "your_secret_key"
oss_endpoint = "your_endpoint"
bucket_name = "your_bucket_name"
oss_access_key = os.environ.get("OSS_ACCESS_KEY_ID")
oss_secret_key = os.environ.get("OSS_SECRET_ACCESS_KEY")
oss_endpoint = os.environ.get("OSS_ENDPOINT")
bucket_name = os.environ.get("OSS_BUCKET_NAME")

metalake_name: str = "TestGvfsWithOSS_metalake" + str(randint(1, 10000))

Expand Down
21 changes: 16 additions & 5 deletions clients/client-python/tests/integration/test_gvfs_with_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,25 @@
logger = logging.getLogger(__name__)


@unittest.skip("This test require S3 service account")
def s3_is_configured():
return all(
[
os.environ.get("S3_ACCESS_KEY_ID") is not None,
os.environ.get("S3_SECRET_ACCESS_KEY") is not None,
os.environ.get("S3_ENDPOINT") is not None,
os.environ.get("S3_BUCKET_NAME") is not None,
]
)


@unittest.skipUnless(s3_is_configured(), "S3 is not configured.")
class TestGvfsWithS3(TestGvfsWithHDFS):
# Before running this test, please set the make sure aws-bundle-x.jar has been
# copy to the $GRAVITINO_HOME/catalogs/hadoop/libs/ directory
s3_access_key = "your_access_key"
s3_secret_key = "your_secret_key"
s3_endpoint = "your_endpoint"
bucket_name = "your_bucket_name"
s3_access_key = os.environ.get("S3_ACCESS_KEY_ID")
s3_secret_key = os.environ.get("S3_SECRET_ACCESS_KEY")
s3_endpoint = os.environ.get("S3_ENDPOINT")
bucket_name = os.environ.get("S3_BUCKET_NAME")

metalake_name: str = "TestGvfsWithS3_metalake" + str(randint(1, 10000))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.gravitino.Catalog;
import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
import org.apache.gravitino.gcs.fs.GCSFileSystemProvider;
Expand All @@ -36,17 +37,16 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.condition.EnabledIf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Disabled(
"Disabled due to we don't have a real GCP account to test. If you have a GCP account,"
+ "please change the configuration(YOUR_KEY_FILE, YOUR_BUCKET) and enable this test.")
@EnabledIf(value = "isGCPConfigured", disabledReason = "GCP is not configured")
public class GravitinoVirtualFileSystemGCSIT extends GravitinoVirtualFileSystemIT {
private static final Logger LOG = LoggerFactory.getLogger(GravitinoVirtualFileSystemGCSIT.class);

public static final String BUCKET_NAME = "YOUR_BUCKET";
public static final String SERVICE_ACCOUNT_FILE = "YOUR_KEY_FILE";
public static final String BUCKET_NAME = System.getenv("GCS_BUCKET_NAME");
public static final String SERVICE_ACCOUNT_FILE = System.getenv("GCS_SERVICE_ACCOUNT_JSON_PATH");

@BeforeAll
public void startIntegrationTest() {
Expand Down Expand Up @@ -141,4 +141,9 @@ protected String genStorageLocation(String fileset) {
@Disabled(
"GCS does not support append, java.io.IOException: The append operation is not supported")
public void testAppend() throws IOException {}

private static boolean isGCPConfigured() {
return StringUtils.isNotBlank(System.getenv("GCS_SERVICE_ACCOUNT_JSON_PATH"))
&& StringUtils.isNotBlank(System.getenv("GCS_BUCKET_NAME"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.condition.EnabledIf;
import org.junit.platform.commons.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Disabled(
"Disabled due to we don't have a real OSS account to test. If you have a GCP account,"
+ "please change the configuration(BUCKET_NAME, OSS_ACCESS_KEY, OSS_SECRET_KEY, OSS_ENDPOINT) and enable this test.")
@EnabledIf(value = "ossIsConfigured", disabledReason = "OSS is not prepared")
public class GravitinoVirtualFileSystemOSSIT extends GravitinoVirtualFileSystemIT {
private static final Logger LOG = LoggerFactory.getLogger(GravitinoVirtualFileSystemOSSIT.class);

public static final String BUCKET_NAME = "YOUR_BUCKET";
public static final String OSS_ACCESS_KEY = "YOUR_OSS_ACCESS_KEY";
public static final String OSS_SECRET_KEY = "YOUR_OSS_SECRET_KEY";
public static final String OSS_ENDPOINT = "YOUR_OSS_ENDPOINT";
public static final String BUCKET_NAME = System.getenv("OSS_BUCKET_NAME");
public static final String OSS_ACCESS_KEY = System.getenv("OSS_ACCESS_KEY_ID");
public static final String OSS_SECRET_KEY = System.getenv("OSS_SECRET_ACCESS_KEY");
public static final String OSS_ENDPOINT = System.getenv("OSS_ENDPOINT");

@BeforeAll
public void startIntegrationTest() {
Expand Down Expand Up @@ -149,4 +149,11 @@ protected String genStorageLocation(String fileset) {
@Disabled(
"OSS does not support append, java.io.IOException: The append operation is not supported")
public void testAppend() throws IOException {}

protected static boolean ossIsConfigured() {
return StringUtils.isNotBlank(System.getenv("OSS_ACCESS_KEY_ID"))
&& StringUtils.isNotBlank(System.getenv("OSS_SECRET_ACCESS_KEY"))
&& StringUtils.isNotBlank(System.getenv("OSS_ENDPOINT"))
&& StringUtils.isNotBlank(System.getenv("OSS_BUCKET_NAME"));
}
}
6 changes: 3 additions & 3 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ airlift-json = "237"
airlift-resolver = "1.6"
hive2 = "2.3.9"
hadoop2 = "2.10.2"
hadoop3 = "3.1.0"
hadoop3 = "3.3.0"
hadoop3-gcs = "1.9.4-hadoop3"
hadoop3-aliyun = "3.1.0"
hadoop-minikdc = "3.3.6"
hadoop3-aliyun = "3.3.0"
hadoop-minikdc = "3.3.0"
htrace-core4 = "4.1.0-incubating"
httpclient5 = "5.2.1"
mockserver = "5.15.0"
Expand Down

0 comments on commit 3ccd89a

Please sign in to comment.