diff --git a/scripts/k8s_setup/Dockerfile_k8s_custom b/scripts/k8s_setup/Dockerfile_k8s_custom new file mode 100755 index 00000000..13914aea --- /dev/null +++ b/scripts/k8s_setup/Dockerfile_k8s_custom @@ -0,0 +1,59 @@ +#FROM apache/spark:3.5.1 +FROM apache/spark:3.5.1-scala2.12-java17-python3-r-ubuntu + +# Set the working directory in the container +WORKDIR /app + +# Initial user is 'spark' +USER root + +# Install Python 3 and pip +# RUN apt-get update && \ +# apt-get install -y python3 python3-pip && \ +# rm -rf /var/lib/apt/lists/* + +# RUN sudo apt-get update && \ +# sudo apt-get install -y python3 python3-pip && \ +# rm -rf /var/lib/apt/lists/* + +# Install python 3.9 --- begin +RUN apt update && \ + apt-get install -y software-properties-common && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt update && \ + apt-get install -y python3.9 python3.9-distutils +RUN rm -rf /var/lib/apt/lists/* + +# Update alternatives to use Python 3.9 as the default python3 +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 + +# install pip for Python 3.9 +RUN apt-get install -y wget && \ + wget https://bootstrap.pypa.io/get-pip.py && \ + python3.9 get-pip.py + +# End --- Install python 3.9 + + +# USER spark # commented now. TODO: check to put back later. + +# Set the Python interpreter used by PySpark +ENV PYSPARK_PYTHON=python3 + +# Install PySpark and any other required Python packages +RUN pip3 install --upgrade pip && \ + pip3 install pyspark==3.5.1 +# RUN pip3 install numpy pandas + +RUN pip3 install --no-deps yaetos==0.12.1 +# Force latest version to avoid using previous ones. +RUN pip3 install -r /usr/local/lib/python3.9/dist-packages/yaetos/scripts/requirements_base.txt + + +USER spark + +# Expose the necessary Spark UI ports +# EXPOSE 4040 7077 8080 + +# COPY start-spark.sh /opt/start-spark.sh +# RUN chmod +x /opt/start-spark.sh diff --git a/scripts/k8s_setup/image_creation_and_registration.sh b/scripts/k8s_setup/image_creation_and_registration.sh new file mode 100755 index 00000000..204f927d --- /dev/null +++ b/scripts/k8s_setup/image_creation_and_registration.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Script to create and register images to AWS ECR. It needs to be executed from the folder containing this script +# Usage: +# - ./image_creation_and_registration.sh creation_repo # to be done once per repo name +# - ./image_creation_and_registration.sh create_spark_img +# - ./image_creation_and_registration.sh login_ECR +# - ./image_creation_and_registration.sh push_to_ECR + + +cwd=$(pwd) # PUT YOUR PATH if needed. +shared_folder="${cwd}/../../" +image_repo=pyspark_img +image_name=$image_repo:3.5.1 +docker_file=Dockerfile_k8s_custom +run_mode=$1 +aws_region=us-east-1 + +if [[ $run_mode = "creation_repo" ]]; then + echo 'Creating repository in AWS' + aws ecr create-repository --repository-name $image_repo +# ------- +elif [[ $run_mode = "create_spark_img" ]]; then + echo 'Create and get in docker' + docker build -t $image_name -f $docker_file . + docker run -it \ + -v $shared_folder:/mnt/shared_folder \ + -h spark \ + -w /mnt/shared_folder/ \ + $image_name \ + bash + # -v $HOME/.aws:/.aws \ +# ------- +elif [[ $run_mode = "login_ECR" ]]; then + echo 'Create and get in docker' + ACCOUNT_ID=$(aws sts get-caller-identity | jq -r '.Account') + aws ecr get-login-password --region $aws_region | docker login --username AWS --password-stdin "$ACCOUNT_ID".dkr.ecr.$aws_region.amazonaws.com +# ------- +elif [[ $run_mode = "push_to_ECR" ]]; then + echo 'Create and get in docker' + ACCOUNT_ID=$(aws sts get-caller-identity | jq -r '.Account') + docker tag $image_name "$ACCOUNT_ID".dkr.ecr.$aws_region.amazonaws.com/$image_name + docker push "$ACCOUNT_ID".dkr.ecr.$aws_region.amazonaws.com/$image_name +# ------- +else + echo 'Incorrect argument, command ignored' +fi