Skip to content

Commit

Permalink
[GLUTEN-8398] Bump Celeborn to 0.4.3 and 0.5.2 (#8399)
Browse files Browse the repository at this point in the history
  • Loading branch information
SteNicholas authored Jan 4, 2025
1 parent d63bb7a commit 8ac0c18
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 7 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/velox_backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ jobs:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
celeborn: [ "celeborn-0.5.2", "celeborn-0.4.2", "celeborn-0.3.2-incubating" ]
celeborn: [ "celeborn-0.5.2", "celeborn-0.4.3", "celeborn-0.3.2-incubating" ]
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
Expand All @@ -566,12 +566,16 @@ jobs:
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with ${{ matrix.celeborn }}
run: |
EXTRA_PROFILE=""
if [ "${{ matrix.celeborn }}" = "celeborn-0.4.2" ]; then
if [ "${{ matrix.celeborn }}" = "celeborn-0.4.3" ]; then
EXTRA_PROFILE="-Pceleborn-0.4"
elif [ "${{ matrix.celeborn }}" = "celeborn-0.5.2" ]; then
EXTRA_PROFILE="-Pceleborn-0.5"
fi
echo "EXTRA_PROFILE: ${EXTRA_PROFILE}"
if [ ! -e "/opt/apache-${{ matrix.celeborn }}-bin.tgz" ]; then
echo "WARNING: please pre-install your required package in docker image since the downloading is throttled by this site."
wget -nv https://archive.apache.org/dist/celeborn/${{ matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz -P /opt/
fi
cd /opt && mkdir -p celeborn && \
tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \
mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
Expand Down
2 changes: 1 addition & 1 deletion dev/docker/Dockerfile.centos8-dynamic-build
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ RUN wget --no-check-certificate https://downloads.apache.org/maven/maven-3/3.8.8
ENV PATH=${PATH}:/usr/lib/maven/bin

RUN wget -nv https://archive.apache.org/dist/celeborn/celeborn-0.3.2-incubating/apache-celeborn-0.3.2-incubating-bin.tgz -P /opt/
RUN wget -nv https://archive.apache.org/dist/celeborn/celeborn-0.4.2/apache-celeborn-0.4.2-bin.tgz -P /opt/
RUN wget -nv https://archive.apache.org/dist/celeborn/celeborn-0.4.3/apache-celeborn-0.4.3-bin.tgz -P /opt/
RUN wget -nv https://archive.apache.org/dist/celeborn/celeborn-0.5.2/apache-celeborn-0.5.2-bin.tgz -P /opt/

RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ private void initializeLifecycleManager() {

private <K, V, C> ShuffleHandle registerCelebornShuffleHandle(
int shuffleId, ShuffleDependency<K, V, C> dependency) {
// for Celeborn 0.4.0
CelebornUtils.registerAppShuffleDeterminate(lifecycleManager, shuffleId, dependency);
return CelebornUtils.getCelebornShuffleHandle(
appUniqueId,
lifecycleManager.getHost(),
Expand All @@ -207,9 +209,6 @@ public <K, V, C> ShuffleHandle registerShuffle(
appUniqueId = SparkUtils.appUniqueId(dependency.rdd().context());
initializeLifecycleManager();

// for Celeborn 0.4.0
CelebornUtils.registerAppShuffleDeterminate(lifecycleManager, shuffleId, dependency);

// Note: generate app unique id at driver side, make sure dependency.rdd.context
// is the same SparkContext among different shuffleIds.
// This method may be called many times.
Expand Down Expand Up @@ -307,6 +306,23 @@ public <K, V> ShuffleWriter<K, V> getWriter(
false,
extension);

// for Celeborn 0.5.2
try {
Field field = CelebornShuffleHandle.class.getDeclaredField("throwsFetchFailure");
field.setAccessible(true);
boolean throwsFetchFailure = (boolean) field.get(handle);
if (throwsFetchFailure) {
Method addFailureListenerMethod =
SparkUtils.class.getMethod(
"addFailureListenerIfBarrierTask",
ShuffleClient.class,
TaskContext.class,
CelebornShuffleHandle.class);
addFailureListenerMethod.invoke(null, shuffleClient, context, h);
}
} catch (NoSuchFieldException | NoSuchMethodException ignored) {
}

int shuffleId;

// for Celeborn 0.4.0
Expand Down
2 changes: 1 addition & 1 deletion tools/gluten-it/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
<profile>
<id>celeborn-0.4</id>
<properties>
<celeborn.version>0.4.2</celeborn.version>
<celeborn.version>0.4.3</celeborn.version>
</properties>
</profile>
<profile>
Expand Down

0 comments on commit 8ac0c18

Please sign in to comment.