-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
Add spark 3.5
julien bignon
committed
Sep 5, 2024
1 parent
71d65ed
commit d29281f
Showing
27 changed files
with
778 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
id: "3.1" | ||
label: "3.1" | ||
available: true | ||
recommended: true | ||
trustLevel: stable | ||
recommended: false | ||
trustLevel: deprecated | ||
deprecationDate: "2024-09-01T00:00:00Z" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Copyright 2019-2021. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
import com.bmuschko.gradle.docker.DockerRemoteApiPlugin | ||
import com.saagie.technologies.SaagieTechnologiesGradlePlugin | ||
|
||
apply<DockerRemoteApiPlugin>() | ||
apply<SaagieTechnologiesGradlePlugin>() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
id: "3.5" | ||
label: "3.5" | ||
available: true | ||
recommended: true | ||
trustLevel: stable |
22 changes: 22 additions & 0 deletions
22
technologies/job/spark/spark-3.5/innerContexts/jre/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
ARG jre_major | ||
FROM spark:3.5.2-scala2.12-java$jre_major-ubuntu | ||
|
||
ENV PATH "$PATH:$SPARK_HOME/bin" | ||
ENV LANG C.UTF-8 | ||
|
||
# LIGHT DEPENDENCIES START | ||
USER root | ||
RUN apt update -qq && apt install -yqq --no-install-recommends \ | ||
wget curl unzip krb5-user zip && \ | ||
rm -rf /var/lib/apt/lists/*s | ||
|
||
COPY entrypoint.sh /opt/ | ||
RUN chmod 755 /opt/entrypoint.sh | ||
|
||
USER spark | ||
|
||
#See hadoop version used by spark and udpate if necessary. | ||
#See https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4 to get right version of aws-java-sdk-bundle | ||
RUN wget -nv https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar && \ | ||
wget -nv https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar && \ | ||
mv *.jar /opt/spark/jars/ |
18 changes: 18 additions & 0 deletions
18
technologies/job/spark/spark-3.5/innerContexts/jre/context.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
id: java-scala | ||
label: Java/Scala | ||
available: true | ||
trustLevel: stable | ||
job: | ||
features: | ||
- type: COMMAND_LINE | ||
label: Command line | ||
mandatory: true | ||
comment: Linux shell command to launch the job. | ||
defaultValue: "spark-submit \\\n--conf spark.executor.memory=1G \\\n--conf spark.executor.cores=1 \\\n--conf spark.kubernetes.executor.limit.cores=1 \\\n--conf spark.executor.instances=2 \\\n--class=Main {file} arg1 arg2" | ||
- type: ARTIFACT | ||
label: Package | ||
mandatory: true | ||
comment: "Compatible upload file : .jar" | ||
- type: SCHEDULER | ||
label: Scheduled | ||
mandatory: true |
141 changes: 141 additions & 0 deletions
141
technologies/job/spark/spark-3.5/innerContexts/jre/entrypoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
#!/bin/bash | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
# Prevent any errors from being silently ignored | ||
set -eo pipefail | ||
|
||
attempt_setup_fake_passwd_entry() { | ||
# Check whether there is a passwd entry for the container UID | ||
local myuid; myuid="$(id -u)" | ||
# If there is no passwd entry for the container UID, attempt to fake one | ||
# You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 | ||
# It's to resolve OpenShift random UID case. | ||
# See also: https://github.com/docker-library/postgres/pull/448 | ||
if ! getent passwd "$myuid" &> /dev/null; then | ||
local wrapper | ||
for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do | ||
if [ -s "$wrapper" ]; then | ||
NSS_WRAPPER_PASSWD="$(mktemp)" | ||
NSS_WRAPPER_GROUP="$(mktemp)" | ||
export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP | ||
local mygid; mygid="$(id -g)" | ||
printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" | ||
printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" | ||
break | ||
fi | ||
done | ||
fi | ||
} | ||
|
||
if [ -z "$JAVA_HOME" ]; then | ||
JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') | ||
fi | ||
|
||
SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" | ||
for v in "${!SPARK_JAVA_OPT_@}"; do | ||
SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) | ||
done | ||
|
||
if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then | ||
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" | ||
fi | ||
|
||
if ! [ -z "${PYSPARK_PYTHON+x}" ]; then | ||
export PYSPARK_PYTHON | ||
fi | ||
if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then | ||
export PYSPARK_DRIVER_PYTHON | ||
fi | ||
|
||
# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. | ||
# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. | ||
if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then | ||
export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" | ||
fi | ||
|
||
if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then | ||
SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; | ||
fi | ||
|
||
if ! [ -z "${SPARK_CONF_DIR+x}" ]; then | ||
SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; | ||
elif ! [ -z "${SPARK_HOME+x}" ]; then | ||
SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; | ||
fi | ||
|
||
# SPARK-43540: add current working directory into executor classpath | ||
SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" | ||
|
||
# Switch to spark if no USER specified (root by default) otherwise use USER directly | ||
switch_spark_if_root() { | ||
if [ $(id -u) -eq 0 ]; then | ||
echo gosu spark | ||
fi | ||
} | ||
|
||
case "$1" in | ||
driver) | ||
shift 1 | ||
CMD=( | ||
"$SPARK_HOME/bin/spark-submit" | ||
--conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" | ||
--conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" | ||
--deploy-mode client | ||
"$@" | ||
) | ||
attempt_setup_fake_passwd_entry | ||
# Execute the container CMD under tini for better hygiene | ||
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" | ||
;; | ||
executor) | ||
shift 1 | ||
CMD=( | ||
${JAVA_HOME}/bin/java | ||
"${SPARK_EXECUTOR_JAVA_OPTS[@]}" | ||
-Xms"$SPARK_EXECUTOR_MEMORY" | ||
-Xmx"$SPARK_EXECUTOR_MEMORY" | ||
-cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" | ||
org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend | ||
--driver-url "$SPARK_DRIVER_URL" | ||
--executor-id "$SPARK_EXECUTOR_ID" | ||
--cores "$SPARK_EXECUTOR_CORES" | ||
--app-id "$SPARK_APPLICATION_ID" | ||
--hostname "$SPARK_EXECUTOR_POD_IP" | ||
--resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" | ||
--podName "$SPARK_EXECUTOR_POD_NAME" | ||
) | ||
attempt_setup_fake_passwd_entry | ||
# Execute the container CMD under tini for better hygiene | ||
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" | ||
;; | ||
|
||
*) | ||
# BEGIN SAAGIE SPECIFIC CODE | ||
cd /sandbox | ||
mkdir -p /opt/spark/conf/ | ||
cat conf/*.conf > /opt/spark/conf/spark-defaults.conf | ||
if test -f main_script; | ||
then | ||
CMD=(/bin/sh ./main_script) | ||
exec "${CMD[@]}" | ||
else | ||
# END SAAGIE SPECIFIC CODE | ||
#Non-spark-on-k8s command provided, proceeding in pass-through mode... | ||
exec "$@" | ||
fi; | ||
;; | ||
esac |
32 changes: 32 additions & 0 deletions
32
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-11/build.gradle.kts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Copyright 2019-2021. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
import com.bmuschko.gradle.docker.DockerRemoteApiPlugin | ||
import com.saagie.technologies.SaagieTechnologiesGradlePlugin | ||
import com.saagie.technologies.readDockerInfo | ||
import com.saagie.technologies.getVersionForDocker | ||
|
||
|
||
apply<DockerRemoteApiPlugin>() | ||
apply<SaagieTechnologiesGradlePlugin>() | ||
|
||
tasks.withType(com.bmuschko.gradle.docker.tasks.image.DockerBuildImage::class) { | ||
this.buildArgs.put( | ||
"jre_major", | ||
"11" | ||
) | ||
} |
Empty file.
4 changes: 4 additions & 0 deletions
4
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-11/dockerInfo.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
image: saagie/spark | ||
baseTag: 3.5-jre-11 | ||
dynamicVersion: 1.125.0 | ||
version: 3.5-jre-11-1.125.0 |
62 changes: 62 additions & 0 deletions
62
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-11/image_test.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
schemaVersion: "2.0.0" | ||
|
||
metadataTest: | ||
env: | ||
- key: LANG | ||
value: "C.UTF-8" | ||
- key: JAVA_HOME | ||
value: "/opt/java/openjdk" | ||
- key: SPARK_HOME | ||
value: "/opt/spark" | ||
|
||
fileExistenceTests: | ||
- name: "entrypoint.sh" | ||
path: "/opt/entrypoint.sh" | ||
shouldExist: true | ||
permissions: "-rwxr-xr-x" | ||
|
||
- name: "kinit" | ||
path: "/usr/bin/kinit" | ||
shouldExist: true | ||
permissions: "-rwxr-xr-x" | ||
|
||
commandTests: | ||
- name: "Workdir" | ||
command: "pwd" | ||
expectedOutput: ["/opt/spark/work-dir"] | ||
|
||
- name: "Spark version" | ||
command: "/opt/spark/bin/spark-submit" | ||
args: ["--version"] | ||
expectedError: ["version 3.5.*"] | ||
|
||
- name: "krb5-user installation" | ||
command: "kinit" | ||
expectedError: ["kinit: Client's credentials have been revoked while getting initial credentials"] | ||
exitCode: 1 | ||
|
||
- name: "wget" | ||
args: ["--help"] | ||
command: "wget" | ||
exitCode: 0 | ||
|
||
- name: "curl" | ||
args: ["--help"] | ||
command: "curl" | ||
exitCode: 0 | ||
|
||
- name: "unzip" | ||
args: ["--help"] | ||
command: "unzip" | ||
exitCode: 0 | ||
|
||
- name: "tar" | ||
args: ["--help"] | ||
command: "tar" | ||
exitCode: 0 | ||
|
||
- name: "tini" | ||
command: "/usr/bin/tini" | ||
args: ["--version"] | ||
expectedOutput: ["tini version 0.18.0.*"] | ||
exitCode: 0 |
5 changes: 5 additions & 0 deletions
5
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-11/innerContext.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
id: "11" | ||
label: "11" | ||
available: true | ||
trustLevel: stable | ||
recommended: true |
32 changes: 32 additions & 0 deletions
32
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-17/build.gradle.kts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Copyright 2019-2021. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
import com.bmuschko.gradle.docker.DockerRemoteApiPlugin | ||
import com.saagie.technologies.SaagieTechnologiesGradlePlugin | ||
import com.saagie.technologies.readDockerInfo | ||
import com.saagie.technologies.getVersionForDocker | ||
|
||
|
||
apply<DockerRemoteApiPlugin>() | ||
apply<SaagieTechnologiesGradlePlugin>() | ||
|
||
tasks.withType(com.bmuschko.gradle.docker.tasks.image.DockerBuildImage::class) { | ||
this.buildArgs.put( | ||
"jre_major", | ||
"17" | ||
) | ||
} |
Empty file.
4 changes: 4 additions & 0 deletions
4
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-17/dockerInfo.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
image: saagie/spark | ||
baseTag: 3.5-jre-17 | ||
dynamicVersion: 1.125.0 | ||
version: 3.5-jre-11-1.125.0 |
62 changes: 62 additions & 0 deletions
62
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-17/image_test.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
schemaVersion: "2.0.0" | ||
|
||
metadataTest: | ||
env: | ||
- key: LANG | ||
value: "C.UTF-8" | ||
- key: JAVA_HOME | ||
value: "/opt/java/openjdk" | ||
- key: SPARK_HOME | ||
value: "/opt/spark" | ||
|
||
fileExistenceTests: | ||
- name: "entrypoint.sh" | ||
path: "/opt/entrypoint.sh" | ||
shouldExist: true | ||
permissions: "-rwxr-xr-x" | ||
|
||
- name: "kinit" | ||
path: "/usr/bin/kinit" | ||
shouldExist: true | ||
permissions: "-rwxr-xr-x" | ||
|
||
commandTests: | ||
- name: "Workdir" | ||
command: "pwd" | ||
expectedOutput: ["/opt/spark/work-dir"] | ||
|
||
- name: "Spark version" | ||
command: "/opt/spark/bin/spark-submit" | ||
args: ["--version"] | ||
expectedError: ["version 3.5.*"] | ||
|
||
- name: "krb5-user installation" | ||
command: "kinit" | ||
expectedError: ["kinit: Client's credentials have been revoked while getting initial credentials"] | ||
exitCode: 1 | ||
|
||
- name: "wget" | ||
args: ["--help"] | ||
command: "wget" | ||
exitCode: 0 | ||
|
||
- name: "curl" | ||
args: ["--help"] | ||
command: "curl" | ||
exitCode: 0 | ||
|
||
- name: "unzip" | ||
args: ["--help"] | ||
command: "unzip" | ||
exitCode: 0 | ||
|
||
- name: "tar" | ||
args: ["--help"] | ||
command: "tar" | ||
exitCode: 0 | ||
|
||
- name: "tini" | ||
command: "/usr/bin/tini" | ||
args: ["--version"] | ||
expectedOutput: ["tini version 0.19.0.*"] | ||
exitCode: 0 |
5 changes: 5 additions & 0 deletions
5
technologies/job/spark/spark-3.5/innerContexts/jre/spark-3.5-jre-17/innerContext.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
id: "17" | ||
label: "17" | ||
available: true | ||
trustLevel: stable | ||
recommended: true |
40 changes: 40 additions & 0 deletions
40
technologies/job/spark/spark-3.5/innerContexts/python/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
ARG base_img | ||
|
||
FROM spark:3.5.2 AS SPARK_BASE | ||
|
||
FROM ${base_img} AS BASE_IMG | ||
|
||
COPY --from=SPARK_BASE /opt/spark /opt/spark | ||
COPY --from=SPARK_BASE /usr/bin/tini /usr/bin/tini | ||
|
||
COPY --from=SPARK_BASE /opt/java/openjdk /opt/java/openjdk | ||
|
||
ENV JAVA_HOME /opt/java/openjdk | ||
ENV LANG C.UTF-8 | ||
ENV SPARK_HOME /opt/spark | ||
|
||
#See https://github.com/apache/spark-docker/blob/master/Dockerfile.template#L19 | ||
ARG spark_uid=185 | ||
|
||
RUN groupadd --system --gid=${spark_uid} spark && \ | ||
useradd --system --uid=${spark_uid} --gid=spark spark | ||
|
||
RUN apt update -qq && apt install -yqq --no-install-recommends \ | ||
gosu && \ | ||
rm -rf /var/lib/apt/lists/*s | ||
|
||
RUN pip --no-cache-dir install --upgrade pip \ | ||
&& pip --no-cache-dir install pyspark==3.5.2 \ | ||
&& rm -rf /root/.cachex \ | ||
&& rm -rf /boot/.cache/pip \ | ||
&& rm -rf ~/.cache/pip | ||
|
||
# As long as base image is from saagie, no need to add krb5 or LD_LIBRARY_PATH | ||
|
||
# Move scripts and frequently changing directive to the end of the build | ||
COPY entrypoint.sh /opt/ | ||
RUN chmod 755 /opt/entrypoint.sh | ||
|
||
WORKDIR /opt/spark/work-dir | ||
|
||
ENTRYPOINT [ "/opt/entrypoint.sh" ] |
22 changes: 22 additions & 0 deletions
22
technologies/job/spark/spark-3.5/innerContexts/python/context.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
id: python | ||
label: Python | ||
available: true | ||
trustLevel: stable | ||
job: | ||
features: | ||
- type: COMMAND_LINE | ||
label: Command line | ||
mandatory: true | ||
comment: Linux shell command to launch the job. | ||
defaultValue: "spark-submit \\\n--conf spark.executor.memory=1G \\\n--conf spark.executor.cores=1 \\\n--conf spark.kubernetes.executor.limit.cores=1 \\\n--conf spark.executor.instances=2 \\\n--py-files={file} local://__main__.py" | ||
- type: ARTIFACT | ||
label: Package | ||
mandatory: true | ||
comment: "Compatible upload file : .py or .zip" | ||
- type: SCHEDULER | ||
label: Scheduled | ||
mandatory: true | ||
- type: AI_DESCRIPTION_GENERATOR | ||
label: AI description generator enabled | ||
mandatory: true | ||
comment: Activation of the AI-based automatic description generation function. |
182 changes: 182 additions & 0 deletions
182
technologies/job/spark/spark-3.5/innerContexts/python/entrypoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
#!/bin/bash | ||
# FROM https://github.com/apache/spark-docker/blob/master/entrypoint.sh.template | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
# Prevent any errors from being silently ignored | ||
set -eo pipefail | ||
|
||
attempt_setup_fake_passwd_entry() { | ||
# Check whether there is a passwd entry for the container UID | ||
local myuid; myuid="$(id -u)" | ||
# If there is no passwd entry for the container UID, attempt to fake one | ||
# You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 | ||
# It's to resolve OpenShift random UID case. | ||
# See also: https://github.com/docker-library/postgres/pull/448 | ||
if ! getent passwd "$myuid" &> /dev/null; then | ||
local wrapper | ||
for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do | ||
if [ -s "$wrapper" ]; then | ||
NSS_WRAPPER_PASSWD="$(mktemp)" | ||
NSS_WRAPPER_GROUP="$(mktemp)" | ||
export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP | ||
local mygid; mygid="$(id -g)" | ||
printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" | ||
printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" | ||
break | ||
fi | ||
done | ||
fi | ||
} | ||
|
||
if [ -z "$JAVA_HOME" ]; then | ||
JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') | ||
fi | ||
|
||
# BEGIN SAAGIE SPECIFIC CODE | ||
cd /sandbox | ||
# parse content and if pyfiles extract minio url and inject it | ||
if [ -f main_script ] && grep -q "\--py-files" main_script; | ||
then | ||
PYSPARK_FILES="`grep -Po '.*--py-files=\K[^ ]+' main_script`" | ||
fi; | ||
|
||
if [ -n "$PYSPARK_FILES" ]; then | ||
PYTHONPATH="$PYTHONPATH:$PYSPARK_FILES" | ||
#Copy and unzip pyfiles | ||
if [[ $PYSPARK_FILES == *[,]* ]];then | ||
echo "PYSPARK_FILES contains comma" | ||
pyfiles=$(echo $PYSPARK_FILES | tr "," "\n") | ||
|
||
for file in $pyfiles | ||
do | ||
echo ">>> [$file]" | ||
wget -nv $file | ||
done | ||
else | ||
echo ">>> [$PYSPARK_FILES]" | ||
wget -nv $PYSPARK_FILES | ||
fi | ||
if [ -f *.zip ] | ||
then | ||
unzip -q *.zip | ||
fi | ||
if [ -f "requirements.txt" ] | ||
then | ||
pip install -r requirements.txt | ||
fi | ||
rm -Rf /opt/spark/work-dir | ||
ln -s /sandbox/ /opt/spark/work-dir | ||
fi | ||
# END SAAGIE SPECIFIC CODE | ||
|
||
SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" | ||
for v in "${!SPARK_JAVA_OPT_@}"; do | ||
SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) | ||
done | ||
|
||
if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then | ||
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" | ||
fi | ||
|
||
if ! [ -z "${PYSPARK_PYTHON+x}" ]; then | ||
export PYSPARK_PYTHON | ||
fi | ||
if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then | ||
export PYSPARK_DRIVER_PYTHON | ||
fi | ||
|
||
# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. | ||
# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. | ||
if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then | ||
export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" | ||
fi | ||
|
||
if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then | ||
SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; | ||
fi | ||
|
||
if ! [ -z "${SPARK_CONF_DIR+x}" ]; then | ||
SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; | ||
elif ! [ -z "${SPARK_HOME+x}" ]; then | ||
SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; | ||
fi | ||
|
||
# SPARK-43540: add current working directory into executor classpath | ||
SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" | ||
|
||
# Switch to spark if no USER specified (root by default) otherwise use USER directly | ||
#SAAGIE disable this part because main_script only ready by root user. | ||
switch_spark_if_root() { | ||
# if [ $(id -u) -eq 0 ]; then | ||
# echo gosu spark | ||
# fi | ||
echo "" | ||
} | ||
|
||
case "$1" in | ||
driver) | ||
shift 1 | ||
CMD=( | ||
"$SPARK_HOME/bin/spark-submit" | ||
--conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" | ||
--conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" | ||
--py-files=/sandbox/* # SAAGIE SPECIFIC CODE | ||
--deploy-mode client | ||
"$@" | ||
) | ||
attempt_setup_fake_passwd_entry | ||
# Execute the container CMD under tini for better hygiene | ||
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" | ||
;; | ||
executor) | ||
shift 1 | ||
CMD=( | ||
${JAVA_HOME}/bin/java | ||
"${SPARK_EXECUTOR_JAVA_OPTS[@]}" | ||
-Xms"$SPARK_EXECUTOR_MEMORY" | ||
-Xmx"$SPARK_EXECUTOR_MEMORY" | ||
-cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" | ||
org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend | ||
--driver-url "$SPARK_DRIVER_URL" | ||
--executor-id "$SPARK_EXECUTOR_ID" | ||
--cores "$SPARK_EXECUTOR_CORES" | ||
--app-id "$SPARK_APPLICATION_ID" | ||
--hostname "$SPARK_EXECUTOR_POD_IP" | ||
--resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" | ||
--podName "$SPARK_EXECUTOR_POD_NAME" | ||
) | ||
attempt_setup_fake_passwd_entry | ||
# Execute the container CMD under tini for better hygiene | ||
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" | ||
;; | ||
|
||
*) | ||
# BEGIN SAAGIE SPECIFIC CODE | ||
mkdir -p /opt/spark/conf/ | ||
cat conf/*.conf > /opt/spark/conf/spark-defaults.conf | ||
echo "spark.kubernetes.driver.pod.name $HOSTNAME" >> /opt/spark/conf/spark-defaults.conf | ||
if test -f main_script; | ||
then | ||
CMD=(/bin/sh ./main_script) | ||
exec "${CMD[@]}" | ||
else | ||
# END SAAGIE SPECIFIC CODE | ||
# Non-spark-on-k8s command provided, proceeding in pass-through mode... | ||
exec "$@" | ||
fi; | ||
;; | ||
esac |
35 changes: 35 additions & 0 deletions
35
technologies/job/spark/spark-3.5/innerContexts/python/spark-3.5-python-3.12/build.gradle.kts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Copyright 2019-2021. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
import com.bmuschko.gradle.docker.DockerRemoteApiPlugin | ||
import com.saagie.technologies.SaagieTechnologiesGradlePlugin | ||
import com.saagie.technologies.readDockerInfo | ||
import com.saagie.technologies.getVersionForDocker | ||
|
||
|
||
apply<DockerRemoteApiPlugin>() | ||
apply<SaagieTechnologiesGradlePlugin>() | ||
|
||
val dockerInfo = readDockerInfo(projectDir) | ||
|
||
tasks.withType(com.bmuschko.gradle.docker.tasks.image.DockerBuildImage::class) { | ||
this.buildArgs.put( | ||
"base_img", | ||
"saagie/python:3.12-1.183.0" | ||
) | ||
} | ||
|
Empty file.
4 changes: 4 additions & 0 deletions
4
technologies/job/spark/spark-3.5/innerContexts/python/spark-3.5-python-3.12/dockerInfo.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
image: saagie/spark | ||
baseTag: 3.5-py-3.12 | ||
dynamicVersion: 1.139.0_SDKTECHNO-207 | ||
version: 3.5-py-3.12-1.139.0 |
67 changes: 67 additions & 0 deletions
67
technologies/job/spark/spark-3.5/innerContexts/python/spark-3.5-python-3.12/image_test.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
schemaVersion: "2.0.0" | ||
|
||
metadataTest: | ||
env: | ||
- key: LANG | ||
value: "C.UTF-8" | ||
- key: JAVA_HOME | ||
value: "/opt/java/openjdk" | ||
- key: SPARK_HOME | ||
value: "/opt/spark" | ||
|
||
fileExistenceTests: | ||
- name: "entrypoint.sh" | ||
path: "/opt/entrypoint.sh" | ||
shouldExist: true | ||
permissions: "-rwxr-xr-x" | ||
|
||
- name: "kinit" | ||
path: "/usr/bin/kinit" | ||
shouldExist: true | ||
permissions: "-rwxr-xr-x" | ||
|
||
commandTests: | ||
- name: "Workdir" | ||
command: "pwd" | ||
expectedOutput: ["/opt/spark/work-dir"] | ||
|
||
- name: "Spark version" | ||
command: "/opt/spark/bin/spark-submit" | ||
args: ["--version"] | ||
expectedError: ["version 3.5.*"] | ||
|
||
- name: "python installation" | ||
command: "which" | ||
args: ["python"] | ||
expectedOutput: ["/usr/local/bin/python"] | ||
|
||
- name: "krb5-user installation" | ||
command: "kinit" | ||
expectedError: ["kinit: Program lacks support for encryption type while getting initial credentials"] | ||
exitCode: 1 | ||
|
||
- name: "wget" | ||
args: ["--help"] | ||
command: "wget" | ||
exitCode: 0 | ||
|
||
- name: "curl" | ||
args: ["--help"] | ||
command: "curl" | ||
exitCode: 0 | ||
|
||
- name: "unzip" | ||
args: ["--help"] | ||
command: "unzip" | ||
exitCode: 0 | ||
|
||
- name: "tar" | ||
args: ["--help"] | ||
command: "tar" | ||
exitCode: 0 | ||
|
||
- name: "tini" | ||
command: "/usr/bin/tini" | ||
args: ["--version"] | ||
expectedOutput: ["tini version 0.18.0.*"] | ||
exitCode: 0 |
5 changes: 5 additions & 0 deletions
5
...nologies/job/spark/spark-3.5/innerContexts/python/spark-3.5-python-3.12/innerContext.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
id: "3.12" | ||
label: "3.12" | ||
available: true | ||
trustLevel: stable | ||
recommended: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters