diff --git a/Dockerfile b/Dockerfile index 6af603ab0..fdfcc23ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,6 +53,20 @@ RUN python3 -m venv --system-site-packages dev-packages \ && dev-packages/bin/pip3 install -U --no-cache-dir pip \ && dev-packages/bin/pip3 install -r requirements.dev.txt +RUN apt-get update +RUN apt-get install -y curl ca-certificates apt-transport-https gnupg +RUN curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - +RUN touch /etc/apt/sources.list.d/kubernetes.list +RUN echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list +RUN apt-get update +RUN apt-get install -y kubectl +RUN apt-get install -y awscli +RUN apt-get install -y iputils-ping +RUN apt install iproute2 -y + +# RUN echo "kind-control-plane host.docker.internal" > /etc/host.aliases +# RUN echo "export HOSTALIASES=/etc/host.aliases" >> /etc/profile + USER controlpanel COPY controlpanel controlpanel COPY docker docker diff --git a/Makefile.local.mk b/Makefile.local.mk index ffd8b60b9..815db1739 100644 --- a/Makefile.local.mk +++ b/Makefile.local.mk @@ -1,33 +1,65 @@ all: help +## docker-login: Authenticate docker with ECR +docker-login: + aws-vault exec admin-data -- aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin $(REGISTRY) + ## dev-prepare-up: Run migration before doing up dev-prepare-up: docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml run migration +## local-prepare-up: Run migration before doing up +local-prepare-up: + docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml run migration + +## local-daemon: Startup with docker process in background (to stop afterwards use make clean) +local-daemon: local-prepare-up + docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml up -d frontend_eks + ## dev-daemon: Startup with docker process in background (to stop afterwards use make clean) dev-daemon: dev-prepare-up docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml up -d frontend +## local-fg: Startup with docker process in foreground +local-fg: local-prepare-up + docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml up frontend_eks + ## dev-fg: Startup with docker process in foreground dev-fg: dev-prepare-up docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml up frontend -## dev-debug: Startup clean docker process in background, and docker attach to foreground for debugging -dev-debug: clean dev-daemon - docker attach $(shell sh -c "docker-compose ps -q frontend") +# ## dev-debug: Startup clean docker process in background, and docker attach to foreground for debugging +# dev-debug: clean dev-daemon +# docker attach $(shell sh -c "docker-compose ps -q frontend") + +## local-attach: Attach to existing running background docker process for purposes of debugging +local-attach: + docker attach $(shell sh -c "docker-compose ps -q frontend_eks") ## dev-attach: Attach to existing running background docker process for purposes of debugging dev-attach: docker attach $(shell sh -c "docker-compose ps -q frontend") +## local-py: Start django shell (in the dev-packages context) in new container +local-py: + docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml run frontend_eks sh -c "dev-packages/bin/python manage.py shell" + ## dev-py: Start django shell (in the dev-packages context) in new container dev-py: docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml run frontend sh -c "dev-packages/bin/python manage.py shell" +## local-run: Start shell in new copy of container +local-run: + docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml run --rm frontend_eks bash + ## dev-run: Start shell in new copy of container dev-run: docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml run --rm frontend bash +## local-exec: Exec into shell of existing container +local-exec: + docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml exec frontend_eks bash + ## dev-exec: Exec into shell of existing container dev-exec: docker-compose -f docker-compose.yaml -f docker-compose.dev.yaml exec frontend bash diff --git a/README.md b/README.md index 300069b86..df740754d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ +# KIND SPIKE BRANCH + +For details check [here](./current_status_of_spike.md) + + [![Docker Repository on Quay](https://quay.io/repository/mojanalytics/control-panel/status "Docker Repository on Quay")](https://quay.io/repository/mojanalytics/control-panel) # Analytical Platform Control Panel diff --git a/controlpanel/api/cluster.py b/controlpanel/api/cluster.py index 43bd85c54..9da26c762 100644 --- a/controlpanel/api/cluster.py +++ b/controlpanel/api/cluster.py @@ -60,19 +60,19 @@ def _init_user(self): f"Username={self.user.slug}" ), ) - helm.upgrade_release( - f"provision-user-{self.user.slug}", # release - f"{settings.HELM_REPO}/provision-user", # chart - f"--namespace={self.k8s_namespace}", - f"--set=" - + ( - f"Username={self.user.slug}," - f"Efsvolume={settings.EFS_VOLUME}," - f"OidcDomain={settings.OIDC_DOMAIN}," - f"Email={self.user.email}," - f"Fullname={self.user.name}," - ), - ) + # helm.upgrade_release( + # f"provision-user-{self.user.slug}", # release + # f"{settings.HELM_REPO}/provision-user", # chart + # f"--namespace={self.k8s_namespace}", + # f"--set=" + # + ( + # f"Username={self.user.slug}," + # f"Efsvolume={settings.EFS_VOLUME}," + # f"OidcDomain={settings.OIDC_DOMAIN}," + # f"Email={self.user.email}," + # f"Fullname={self.user.name}," + # ), + # ) else: helm.upgrade_release( f"init-user-{self.user.slug}", # release @@ -485,6 +485,7 @@ def get_deployments( k8s = KubernetesClient(id_token=id_token) results = k8s.AppsV1Api.list_namespaced_deployment(user.k8s_namespace) for deployment in results.items: + # breakpoint() app_name = deployment.metadata.labels["app"] _, version = deployment.metadata.labels["chart"].rsplit("-", 1) if search_name and search_name not in app_name: diff --git a/controlpanel/develop/urls.py b/controlpanel/develop/urls.py index eaa3318a0..41cf2b23f 100644 --- a/controlpanel/develop/urls.py +++ b/controlpanel/develop/urls.py @@ -1,8 +1,8 @@ from django.urls import path -from .views import develop_index +from .views import is_kube_connected_view urlpatterns = [ - path("", develop_index, name="develop_index"), + path("kube_connected/", is_kube_connected_view, name="is_kube_connected"), ] diff --git a/controlpanel/develop/views.py b/controlpanel/develop/views.py index dcd288c60..8448516c8 100644 --- a/controlpanel/develop/views.py +++ b/controlpanel/develop/views.py @@ -1,14 +1,46 @@ +import json +import subprocess +from os import environ from typing import List +from kubernetes import client, config + +from controlpanel.api.models import User from django.contrib.auth.decorators import login_required from django.http import HttpResponse from django.shortcuts import render +from rest_framework.decorators import api_view, permission_classes +from rest_framework.permissions import AllowAny +from rest_framework.response import Response + +def run_command(command, *args): + env = environ.copy() + bits = command.split() + command = bits[0] + args = bits[1:] + output = subprocess.Popen( + [command, *args], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + encoding="utf8", + env=env, + ) + out, err = output.communicate() + return out, err + def installed_tools(username: str) -> List[str]: # TODO: Get a list of this user's installed tools and return # a list of string ["like", "this"] + user = User.objects.get(username=username) + raw_cmd = f"kubectl get tools -n user-{username} -o json" + raw_bits = raw_cmd.split() + command = raw_bits[0] + args = raw_bits[1:] + out, err = run_command(command, *args) + breakpoint() return [] @@ -18,27 +50,13 @@ def user_selected_tool(username: str, toolname: str) -> str: return f"Install {toolname} for {username}" -@login_required() -def develop_index(request): - status = None - tool = None - - if request.method == "POST": - data = request.POST - - tool = data.get("tool", "") - if not tool: - status = "No tool selected" - else: - status = user_selected_tool(request.user, tool) - - return render( - request, - "develop/index.html", - { - "username": request.user, - "status": status, - "tool": tool, - "installed_tools": installed_tools(request.user), - }, - ) +# @login_required() +@api_view() +@permission_classes([AllowAny]) +def is_kube_connected_view(request): + config.load_kube_config() + + v1 = client.CoreV1Api() + services = v1.list_namespaced_service("default") + # breakpoint() + return Response(services.to_dict()) \ No newline at end of file diff --git a/controlpanel/urls.py b/controlpanel/urls.py index fc012bccb..ec2c0cacb 100644 --- a/controlpanel/urls.py +++ b/controlpanel/urls.py @@ -16,7 +16,7 @@ path("metrics", exports.ExportToDjangoView, name="prometheus-django-metrics"), ] -if "controlpanel.develop" in settings.INSTALLED_APPS: +if "controlpanel.develop" in settings.INSTALLED_APPS and settings.DEBUG: urlpatterns += [ path("develop/", include('controlpanel.develop.urls')), ] diff --git a/current_status_of_spike.md b/current_status_of_spike.md new file mode 100644 index 000000000..379f6ae2d --- /dev/null +++ b/current_status_of_spike.md @@ -0,0 +1,200 @@ +# ANPL-857 removal of dependency on external cluster + +The aim was to use a local [kind](https://kind.sigs.k8s.io/) cluster, instead of using dev-kops (or now dev-eks). + +## What we wanted to achieve + +Currently our "development" process uses the dev cluster in AWS, either the old KOPS cluster, or the new EKS one, this is not ideal, for many reasons. + +The task was to achieve the following +- Controlpanel would talk to the kind cluster instead of the remote one +- You could authenticate a new user, and the relevant charts would be run in the kind cluster +- You could successfully deploy a tool in the kind cluster + +At this stage, we didn't intend to interact with the tool, so ingress wasn't needed, just that the tool was successfully spun up. + +## What we have achieved + +- Controlpanel is talking to the kind cluster. +- You can authenicate a user, their namespace is created, but you cannot run the `provision-user` chart. +- You can see the tools to deploy, you can call for a deployment, but the deployment currently fails. + +## Pre-getting started setup + +Currently this is based on also having [`tools-controller-poc`](https://github.com/jasonBirchall/tools-controller-poc) setup + +### Go versions warning +You will currently need to use go 1.17 + +If you're on a mac, you can install using +```bash +brew install go@1.17 +# You'll need to symlink into your path because its a keg only formula, for example +ln -s /usr/local/opt/go@1.17/bin/go ~/bin/go +``` + +### Set up the kind cluster +```bash +# checkout in a new directory +git clone https://github.com/jasonBirchall/tools-controller-poc +cd tools-controller-poc/ +# This instantiates the cluster +./create-kind.sh +# Copy config into place for controlpanel to pick up +kind get kubeconfig --internal > ~/.kube/controlpanel +``` + +## Getting started + +We've created a set of make commands that mirror their dev-* counterparts + +| make | effect | +| ----------------- | -------------------------------| +| local-prepare-up | Run migration before doing up (run automatically) | +| local-daemon | Startup with docker process in background (to stop afterwards use make clean) | +| local-fg | Startup with docker process in foreground | +| local-attach | Attach to existing running background controlpanel docker process for purposes of debugging | +| local-py | Start django shell (in the dev-packages context) in new container | +| local-run | Start shell in new copy of controlpanel container | +| local-exec | Exec into shell of existing controlpanel container | + +So to build and run + +```bash +# Authenticate with ECR +make docker-login + +# Build the containers +make eks + +# bring up in foreground +aws-vault exec admin-dev -- make local-fg +``` + +You should now be able to go to http://localhost:8000/ and login via Auth0 + +### If it goes wrong on the login +| Error message | What this means | +| ------------- | ----------------| +| 'no oidc credentials' | You've not run aws-vault exec before the make command | +| 'APIException' on the /tools/ page | You've not copied over the kube config from kind | +| 'MaxRetryError at /tools/' | You've created a new kind cluster and not updated your .kube/controlpanel file | + + +### Getting to the tools page and nothing is there + +Once you've logged in you'll get the follow view + +![Tools page with missing tools](./doc/images/no-tools-page.png "Missing tools") + +So all the fixture based tool releases are for the KOPS cluster, and you're now using the EKS container/setup which means none of the default tool releases will show up. + +So to fix this, log in via local-py. + +```bash +# Login to python shell +make local-py +# Import the Tool release model +In [1]: from controlpanel.api.models import Tool +# Update the tools to be EKS +In [2]: Tool.objects.update(target_infrastructure=Tool.EKS) +``` + +![Tools page](./doc/images/all-the-tools.png "The tools you were missing") + +Now reload the page you'll see some shiny tools (you might need to restart the server). + +_N.B. these are archaic versions of the tools and should in no way be expected to work, even if deployment was working_ + +You can click deploy, and it will try and deploy, but unfortunatly you'll get a failed response back, which gets us as far as we've gotten and we can now move on to the "what needs fixing" section. + + +## What isn't working. + +### Account creation + +Currently we've disabled the provision user chart, because the chart needs access to S3 and IAM to instantiated the user and give them credentials. + +This was done by commenting out lines in [controlpanel/api/cluster.py](https://github.com/ministryofjustice/analytics-platform-control-panel/blob/5a5d0db63c77aea8b8aad419d829f76555a7f4f3/controlpanel/api/cluster.py#L63-L75). + + +### Doing a deployment + +Its not always easy to surface what's gone wrong because the deployment happens on the worker rather than the frontend container, when trying to deploy, you'll get a failure response back. + +Here is the error message from my worker when trying to deploy juypterlab +``` +{'event': 'Missing value for helm chart param release - jupyter-lab-ladyrassilon version - 0.4.3 namespace - user-ladyrassilon, key name - toolsDomain', 'timestamp': '2022-04-21T13:48:03.769214Z', 'logger': 'controlpanel.api.cluster', 'level': 'warning'} +{'event': 'Release "jupyter-lab-ladyrassilon" does not exist. Installing it now.', 'timestamp': '2022-04-21T13:48:15.906479Z', 'logger': 'controlpanel.api.models.tool', 'level': 'error'} +{'event': 'Error: unable to build kubernetes objects from release manifest: [unable to recognize "": no matches for kind "Deployment" in version "apps/v1beta2", unable to recognize "": no matches for kind "Ingress" in version "extensions/v1beta1"]', 'timestamp': '2022-04-21T13:48:15.907187Z', 'logger': 'controlpanel.api.models.tool', 'level': 'error'} +{'event': 'Failed deploying Jupyter Lab for ladyrassilon', 'timestamp': '2022-04-21T13:48:16.969753Z', 'logger': 'controlpanel.frontend.consumers', 'level': 'warning'} +``` + +Its not complaining about the release, its the lines after that that need to be looked at. + +```bash +# my current namespaces +❯ kubectl get namespaces +NAME STATUS AGE +default Active 179m +kube-node-lease Active 3h +kube-public Active 3h +kube-system Active 3h +local-path-storage Active 179m +user-ladyrassilon Active 159m +``` + +The user namespace exists, but other components are missing, including the ingress controller. + +We expect that there will need to be a storage backend and a dummy ingress controller that the charts can run against. + +### Other areas + +Anything that is talking to IAM, S3, Apps, Concourse, or Elastic search should not be expected to work, first two will be dependent on [Localstack ticket](https://dsdmoj.atlassian.net/browse/ANPL-858) work, and others are outside of spec. + +## Learnings + +### Connecting the cluster to controlpanel + +We resolved the networking issues by making the controlpanel containers use the existing kind network. We created the kind network first. + +This has been updated both in the dev and main docker-compose files. + +To get the config of the kind cluster to be available for the control panel we used the following command, before startup: + +```bash +kind get kubeconfig --internal > ~/.kube/controlpanel +``` + +As part of testing we added in the is_kube_connected api view, for diagnostic purposes, which essentially returns "we've connected, and we've got access to the default namespace", this is only active in debug mode, however we probably should pull this out into a basic view for dev purposes and debugging. + +## Cleanup + +### Several places + +There are some commented out code and config that can be cleaned up. + +### docker-compose.dev + +The extra-hosts sections can be removed, it was part of an earlier effort to connect the cluster and controlpanel. + +On +- [Frontend](https://github.com/ministryofjustice/analytics-platform-control-panel/blob/5a5d0db63c77aea8b8aad419d829f76555a7f4f3/docker-compose.dev.yaml#L23-L24) +- [Migration](https://github.com/ministryofjustice/analytics-platform-control-panel/blob/5a5d0db63c77aea8b8aad419d829f76555a7f4f3/docker-compose.dev.yaml#L29-L30) +- [Worker](https://github.com/ministryofjustice/analytics-platform-control-panel/blob/5a5d0db63c77aea8b8aad419d829f76555a7f4f3/docker-compose.dev.yaml#L37-L38) + +### Dockerfile +As part of our diagnostics and development, we left these lines in the [Dockerfile](https://github.com/ministryofjustice/analytics-platform-control-panel/blob/5a5d0db63c77aea8b8aad419d829f76555a7f4f3/Dockerfile#L56-L65), which will need cleaning up if they're still there. +```Dockerfile +RUN apt-get update +RUN apt-get install -y curl ca-certificates apt-transport-https gnupg +RUN curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - +RUN touch /etc/apt/sources.list.d/kubernetes.list +RUN echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list +RUN apt-get update +RUN apt-get install -y kubectl +RUN apt-get install -y awscli +RUN apt-get install -y iputils-ping +RUN apt install iproute2 -y +``` + diff --git a/doc/images/all-the-tools.png b/doc/images/all-the-tools.png new file mode 100644 index 000000000..394fd1384 Binary files /dev/null and b/doc/images/all-the-tools.png differ diff --git a/doc/images/no-tools-page.png b/doc/images/no-tools-page.png new file mode 100644 index 000000000..40c1ead98 Binary files /dev/null and b/doc/images/no-tools-page.png differ diff --git a/docker-compose.dev.yaml b/docker-compose.dev.yaml index 863cf5675..a753be6b8 100644 --- a/docker-compose.dev.yaml +++ b/docker-compose.dev.yaml @@ -1,6 +1,16 @@ version: "3" services: + frontend_eks: + image: ${REGISTRY}/${REPOSITORY}_eks:${IMAGE_TAG:-latest} + stdin_open: true + tty: true + command: ["python", "manage.py", "runserver", "0.0.0.0:8000"] + volumes: [ + ./controlpanel:/home/controlpanel/controlpanel, + ./tests:/home/controlpanel/tests, + ] + frontend: image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} stdin_open: true @@ -8,15 +18,21 @@ services: command: ["python", "manage.py", "runserver", "0.0.0.0:8000"] volumes: [ ./controlpanel:/home/controlpanel/controlpanel, - ./tests:/home/controlpanel/tests + ./tests:/home/controlpanel/tests, ] + extra_hosts: + - "host.docker.internal:host-gateway" migration: - image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} + image: ${REGISTRY}/${REPOSITORY}_eks:${IMAGE_TAG:-latest} volumes: [./controlpanel:/home/controlpanel/controlpanel] command: "python manage.py migrate" + extra_hosts: + - "host.docker.internal:host-gateway" worker: - image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} + image: ${REGISTRY}/${REPOSITORY}_eks:${IMAGE_TAG:-latest} stdin_open: true tty: true volumes: [./controlpanel:/home/controlpanel/controlpanel] command: ["python", "manage.py", "runworker", "background_tasks"] + extra_hosts: + - "host.docker.internal:host-gateway" diff --git a/docker-compose.yaml b/docker-compose.yaml index 90321254f..4a6fad5e0 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,6 +1,7 @@ version: "3" services: db: + # network_mode: host image: "postgres:13.3" environment: POSTGRES_DB: "controlpanel" @@ -12,13 +13,15 @@ services: timeout: 5s retries: 5 redis: + # network_mode: host image: "redis" environment: REDIS_PASSWORD: "controlpanel" command: sh -c "exec redis-server --requirepass \"$${REDIS_PASSWORD}\"" migration: - image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} + # network_mode: host + image: ${REGISTRY}/${REPOSITORY}_eks:${IMAGE_TAG:-latest} depends_on: db: condition: service_healthy @@ -35,7 +38,8 @@ services: command: sh -c "./manage.py migrate" worker: - image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} + # network_mode: host + image: ${REGISTRY}/${REPOSITORY}_eks:${IMAGE_TAG:-latest} depends_on: redis: condition: service_started @@ -63,9 +67,15 @@ services: frontend: image: ${REGISTRY}/${REPOSITORY}:${IMAGE_TAG:-latest} + # network_mode: host build: context: . - ports: ["8000:8000"] + + ports: [ + "8000:8000", + # "63409:63409" + ] + # links: [worker, db, redis] depends_on: worker: condition: service_started @@ -133,13 +143,12 @@ services: iamRole: ${iamRole:-dev_control_panel_api} frontend_eks: + # network_mode: host # Apologies to future devops. Naming is hard. image: ${REGISTRY}/${REPOSITORY}_eks:${IMAGE_TAG:-latest} build: context: . - network: ${NETWORK:-default} dockerfile: Dockerfile_EKS - network_mode: ${NETWORK:-default} ports: ["8000:8000"] depends_on: worker: @@ -208,3 +217,13 @@ services: TOOLS_DOMAIN: tools.dev.mojanalytics.xyz defaultRegion: eu-west-1 iamRole: ${iamRole:-dev_control_panel_api} + +networks: + default: + external: + name: kind + + +# networks: +# sillygemma: +# driver: external \ No newline at end of file