Skip to content

Commit

Permalink
new script to create docker images and register + new docker
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurprevot committed Jul 24, 2024
1 parent 1a7aff1 commit 8a7265e
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
59 changes: 59 additions & 0 deletions scripts/k8s_setup/Dockerfile_k8s_custom
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#FROM apache/spark:3.5.1
FROM apache/spark:3.5.1-scala2.12-java17-python3-r-ubuntu

# Set the working directory in the container
WORKDIR /app

# Initial user is 'spark'
USER root

# Install Python 3 and pip
# RUN apt-get update && \
# apt-get install -y python3 python3-pip && \
# rm -rf /var/lib/apt/lists/*

# RUN sudo apt-get update && \
# sudo apt-get install -y python3 python3-pip && \
# rm -rf /var/lib/apt/lists/*

# Install python 3.9 --- begin
RUN apt update && \
apt-get install -y software-properties-common && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt update && \
apt-get install -y python3.9 python3.9-distutils
RUN rm -rf /var/lib/apt/lists/*

# Update alternatives to use Python 3.9 as the default python3
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1

# install pip for Python 3.9
RUN apt-get install -y wget && \
wget https://bootstrap.pypa.io/get-pip.py && \
python3.9 get-pip.py

# End --- Install python 3.9


# USER spark # commented now. TODO: check to put back later.

# Set the Python interpreter used by PySpark
ENV PYSPARK_PYTHON=python3

# Install PySpark and any other required Python packages
RUN pip3 install --upgrade pip && \
pip3 install pyspark==3.5.1
# RUN pip3 install numpy pandas

RUN pip3 install --no-deps yaetos==0.12.1
# Force latest version to avoid using previous ones.
RUN pip3 install -r /usr/local/lib/python3.9/dist-packages/yaetos/scripts/requirements_base.txt


USER spark

# Expose the necessary Spark UI ports
# EXPOSE 4040 7077 8080

# COPY start-spark.sh /opt/start-spark.sh
# RUN chmod +x /opt/start-spark.sh
47 changes: 47 additions & 0 deletions scripts/k8s_setup/image_creation_and_registration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

# Script to create and register images to AWS ECR. It needs to be executed from the folder containing this script
# Usage:
# - ./image_creation_and_registration.sh creation_repo # to be done once per repo name
# - ./image_creation_and_registration.sh create_spark_img
# - ./image_creation_and_registration.sh login_ECR
# - ./image_creation_and_registration.sh push_to_ECR


cwd=$(pwd) # PUT YOUR PATH if needed.
shared_folder="${cwd}/../../"
image_repo=pyspark_img
image_name=$image_repo:3.5.1
docker_file=Dockerfile_k8s_custom
run_mode=$1
aws_region=us-east-1

if [[ $run_mode = "creation_repo" ]]; then
echo 'Creating repository in AWS'
aws ecr create-repository --repository-name $image_repo
# -------
elif [[ $run_mode = "create_spark_img" ]]; then
echo 'Create and get in docker'
docker build -t $image_name -f $docker_file .
docker run -it \
-v $shared_folder:/mnt/shared_folder \
-h spark \
-w /mnt/shared_folder/ \
$image_name \
bash
# -v $HOME/.aws:/.aws \
# -------
elif [[ $run_mode = "login_ECR" ]]; then
echo 'Create and get in docker'
ACCOUNT_ID=$(aws sts get-caller-identity | jq -r '.Account')
aws ecr get-login-password --region $aws_region | docker login --username AWS --password-stdin "$ACCOUNT_ID".dkr.ecr.$aws_region.amazonaws.com
# -------
elif [[ $run_mode = "push_to_ECR" ]]; then
echo 'Create and get in docker'
ACCOUNT_ID=$(aws sts get-caller-identity | jq -r '.Account')
docker tag $image_name "$ACCOUNT_ID".dkr.ecr.$aws_region.amazonaws.com/$image_name
docker push "$ACCOUNT_ID".dkr.ecr.$aws_region.amazonaws.com/$image_name
# -------
else
echo 'Incorrect argument, command ignored'
fi

0 comments on commit 8a7265e

Please sign in to comment.