From 8046dac13874718daa078836d1d263927eabd952 Mon Sep 17 00:00:00 2001 From: DanRunfola Date: Tue, 10 Dec 2024 14:32:36 -0500 Subject: [PATCH] Mlflow tests --- .../images/geoBoundariesBase.Dockerfile | 7 +- geoBoundaryBuilder/k8s_manifests/A_mlflow.yml | 65 +++++++++++++++++++ .../k8s_manifests/C_prefect_workpool.yml | 2 +- geoBoundaryBuilder/test.py | 53 ++++++++------- 4 files changed, 94 insertions(+), 33 deletions(-) create mode 100644 geoBoundaryBuilder/k8s_manifests/A_mlflow.yml diff --git a/geoBoundaryBuilder/images/geoBoundariesBase.Dockerfile b/geoBoundaryBuilder/images/geoBoundariesBase.Dockerfile index 3034e24..e719b6e 100644 --- a/geoBoundaryBuilder/images/geoBoundariesBase.Dockerfile +++ b/geoBoundaryBuilder/images/geoBoundariesBase.Dockerfile @@ -40,16 +40,13 @@ RUN curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o / # Upgrade pip and install Python dependencies RUN pip install --upgrade pip && \ - pip install prefect==3.1.5 kubernetes==25.3.0 + pip install mlflow==2.18.0 # Install geopandas and related dependencies -RUN pip install geopandas==0.13.2 +RUN pip install geopandas==0.13.2 kubernetes==31.0.0 # Install additional Python packages RUN pip install jsonschema==4.19.0 zipfile36==0.1.3 -# Install Prefect Kubernetes components -RUN pip install prefect-kubernetes==0.5.3 - # Set up git-lfs RUN git lfs install diff --git a/geoBoundaryBuilder/k8s_manifests/A_mlflow.yml b/geoBoundaryBuilder/k8s_manifests/A_mlflow.yml new file mode 100644 index 0000000..851620f --- /dev/null +++ b/geoBoundaryBuilder/k8s_manifests/A_mlflow.yml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: Pod +metadata: + name: mlflow-server + namespace: geoboundaries + labels: + app: mlflow-server +spec: + restartPolicy: Always + securityContext: + runAsUser: 71032 + runAsGroup: 9915 + containers: + - name: mlflow-server + image: "python:3.9-slim" # Use the Python slim image + command: ["/bin/sh", "-c"] + args: + - | + echo "Setting up Python user base..." && \ + export PYTHONUSERBASE=/mlflow/python-user-base && \ + pip install --no-cache-dir --user mlflow && \ + echo "Starting MLflow server..." && \ + /mlflow/python-user-base/bin/mlflow server \ + --backend-store-uri sqlite:///mlflow/mlflow.db \ + --default-artifact-root /mlflow/artifacts \ + --host 0.0.0.0 \ + --port 5000 + ports: + - containerPort: 5000 # MLflow default port + env: + - name: MLFLOW_BACKEND_STORE_URI + value: "sqlite:///mlflow/mlflow.db" + - name: MLFLOW_ARTIFACT_ROOT + value: "/mlflow/artifacts" + volumeMounts: + - name: mlflow-data + mountPath: "/mlflow" + resources: + requests: + ephemeral-storage: "1Gi" + memory: "4Gi" + cpu: "2" + limits: + ephemeral-storage: "2Gi" + memory: "8Gi" + cpu: "4" + volumes: + - name: mlflow-data + nfs: + server: 128.239.59.144 + path: /sciclone/geograd/geoBoundaries/mlflow +--- +apiVersion: v1 +kind: Service +metadata: + name: mlflow-server-service + namespace: geoboundaries +spec: + selector: + app: mlflow-server + type: ClusterIP + ports: + - protocol: TCP + port: 5000 + targetPort: 5000 diff --git a/geoBoundaryBuilder/k8s_manifests/C_prefect_workpool.yml b/geoBoundaryBuilder/k8s_manifests/C_prefect_workpool.yml index 5859527..7b7f584 100644 --- a/geoBoundaryBuilder/k8s_manifests/C_prefect_workpool.yml +++ b/geoBoundaryBuilder/k8s_manifests/C_prefect_workpool.yml @@ -16,7 +16,7 @@ spec: restartPolicy: Always containers: - name: prefect-container - image: "gb-workpool-operator:latest" + image: "ghcr.io/wmgeolab/gb-workpool-operator:latest" env: - name: PREFECT_API_URL value: "http://prefect-server-service.geoboundaries.svc.cluster.local:4200/api" diff --git a/geoBoundaryBuilder/test.py b/geoBoundaryBuilder/test.py index 6ba3302..231b3fc 100644 --- a/geoBoundaryBuilder/test.py +++ b/geoBoundaryBuilder/test.py @@ -1,5 +1,7 @@ import os import subprocess +from prefect import flow +from prefect_kubernetes import KubernetesJob # Use the correct import now that the package is installed # Step 1: Configure Prefect home directory and API URL os.environ["PREFECT_HOME"] = "/tmp/.prefect" # Ensure this path is writable @@ -8,36 +10,33 @@ print(f"Using PREFECT_API_URL: {PREFECT_API_URL}") # Debug output subprocess.run(["prefect", "config", "set", f"PREFECT_API_URL={PREFECT_API_URL}"], check=True) -from prefect import flow -from prefect.context import get_run_context - +# Step 2: Define a simple Prefect flow @flow -def my_flow(): - # Your flow logic here - print("Running my flow on Kubernetes") +def simple_flow(): + print("Hello, Prefect!") + return "Flow Completed" +# Step 3: Configure and deploy the flow to Kubernetes if __name__ == "__main__": - # Get the current script path - script_path = __file__ - - # Define dynamic parameters - image = "ghcr.io/wmgeolab/gb-base:latest" - - # Deploy the flow with dynamic configurations - deployment = my_flow.deploy( - name="dynamic-k8s-flow", - work_pool_name="k8s-gB", - image=image, - job_variables={ - "env": {"EXTRA_PIP_PACKAGES": "your-required-packages"}, - "image_pull_policy": "Always", - "command": [ - "bash", - "-c", - f"pip install -r requirements.txt && python {script_path}" - ] - } + # Define dynamic job variables (e.g., for Kubernetes deployment) + job_variables = { + "image_pull_policy": "Always", + "env": {"EXTRA_PIP_PACKAGES": "prefect kubernetes"} + } + + # Create Kubernetes job for Prefect deployment + k8s_infrastructure = KubernetesJob( + image="python:3.11-slim", # Just a placeholder; no need for GitHub or image build + job_variables=job_variables, + image_pull_policy="Always", + ) + + # Deploy the flow using the created infrastructure + deployment = simple_flow.deploy( + name="simple-flow-k8s-deployment", + work_pool_name="k8s-gB", # Use the existing work pool from Prefect + infrastructure=k8s_infrastructure, ) # Optionally, run the deployment immediately - deployment.run() \ No newline at end of file + deployment.run()