From 87229e514af2bfef68334118bab56a848ef20273 Mon Sep 17 00:00:00 2001 From: Emad Rad Date: Mon, 11 Dec 2023 08:50:09 +0330 Subject: [PATCH] fix: custom Elasticsearch host (#49) * fix: new K8S_HARMONY_ELASTIC_HOST config added with this, we can use different elastic hosts in the plugin * chore: cleanup Mark down warnings were fixed in README.md isort and black were used in python files * feat: Makefile added this was added to make sure all tutor plugins use the same pattern and file structure. * feat: support for python 3.11 and 3.12 added * chore: tutor-mfe link removed. from olive version, this was fixed and was removed from documentation * fix: pin tutor version on Palm Multi-tenant Elasticsearch prerequisites are not available till palm version * docs: multi-tenant only works on palm and later --- README.md | 88 +++++++++++-------- charts/harmony-chart/values.yaml | 1 - tutor-contrib-harmony-plugin/Makefile | 34 +++++++ tutor-contrib-harmony-plugin/setup.py | 9 +- .../tutor_k8s_harmony_plugin/__about__.py | 2 +- .../tutor_k8s_harmony_plugin/commands.py | 34 +++---- .../harmony_search/base.py | 2 +- .../patches/openedx-common-settings | 4 +- .../tutor_k8s_harmony_plugin/plugin.py | 11 +-- 9 files changed, 117 insertions(+), 68 deletions(-) create mode 100644 tutor-contrib-harmony-plugin/Makefile diff --git a/README.md b/README.md index aacc81a..26d0e0c 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ This project is focused on making it easy to set up a standardized, scalable, secure Kubernetes environment that can host **multiple instances** of [Open edX](https://www.openedx.org). See [Motivation](#motivation) below. Specifically, this repository contains: + * A Helm Chart that can install necessary shared resources into your cluster (a load balancer / ingress controller, autoscaling infrastructure, monitoring tools, databases, etc.) * A [Tutor](https://docs.tutor.overhang.io/) plugin that configures Tutor to build images that will use the shared resources deployed by the Helm chart. @@ -15,10 +16,11 @@ See [technology stack and architecture](#technology-stack-and-architecture) belo Many Open edX providers and users have a need to deploy multiple instances of Open edX onto Kubernetes, but there is currently no standardized way to do so and each provider must build their own tooling to manage that. This project aims to provide an easy and standardized approach that incorporates industry best practices and lessons learned. In particular, this project aims to provide the following benefits to Open edX operators: + * **Ease of use** and **rapid deployment**: This project aims to provide an Open edX hosting environment that just works out of the box, that can be easily upgraded, and that follows best practices for monitoring, security, etc. * **Lower costs** by sharing resources where it makes sense. For example, by default Tutor's k8s feature will deploy a separate load balancer and ingress controller for each Open edX instance, instead of a shared ingress controller for all the instances in the cluster. Likewise for MySQL, MongoDB, ElasticSearch, and other resources. By using shared resources by default, costs can be dramatically reduced and operational monitoring and maintenance is greatly simplified. - - For setups with many small instances, this shared approach provides a huge cost savings with virtually no decrease in performance. - - For larger instances on the cluster that need dedicated resources, they can easily be configured to do so. + * For setups with many small instances, this shared approach provides a huge cost savings with virtually no decrease in performance. + * For larger instances on the cluster that need dedicated resources, they can easily be configured to do so. * **Scalable hosting** for instances of any size. This means for example that the default configuration includes autoscaling of LMS pods to handle increased traffic. * **Flexibility**: this project aims to be "batteries included" and to support setting up all the resources that you need, with useful default configurations, but it is carefully designed so that operators can configure, replace, or disable any components as needed. @@ -61,7 +63,7 @@ In addition, [the cert-manager Helm charts do not install the required CRDs used Tutor does not offer an autoscaling mechanism by default. This is a critical feature when your application starts to receive more and more traffic. Kubernetes offers two main autoscaling methods: -- **Pod-based scaling**: This mechanism consists of the creation and adjustment of new pods to cover growing workloads. +* **Pod-based scaling**: This mechanism consists of the creation and adjustment of new pods to cover growing workloads. Here we can mention tools like [**Horizontal Pod autoscaler (HPA)**](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) and [**Vertical pod autoscaler (VPA)**](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler). @@ -70,7 +72,7 @@ consumption (generally CPU and memory), and the second one aims to stabilize the by providing suggestions on the best configuration for a workload based on historical resource usage measurements. Both of them are meant to be applied over Kubernetes Deployment instances. -- **Node-based scaling:** This mechanism allows the addition of new NODES to the Kubernetes cluster so compute resources +* **Node-based scaling:** This mechanism allows the addition of new NODES to the Kubernetes cluster so compute resources are guaranteed to schedule new incoming workloads. Tools worth mentioning in this category are [**cluster-autoscaler (CA)**](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) and [Karpenter](https://karpenter.sh/). @@ -89,22 +91,23 @@ still present in your cluster. [pod-autoscaling plugin](https://github.com/eduNEXT/tutor-contrib-pod-autoscaling) enables the implementation of HPA and VPA to start scaling an installation workloads. Variables for the plugin configuration are documented there. -#### Node-autoscaling with Karpenter in EKS Clusters. +#### Node-autoscaling with Karpenter in EKS Clusters This section provides a guide on how to install and configure [Karpenter](https://karpenter.sh/) in a EKS cluster. We'll use infrastructure examples included in this repo for such purposes. > Prerequisites: - - An aws accound id - - Kubectl 1.27 - - Terraform 1.5.x or higher - - Helm + +* An aws account id +* Kubectl 1.27 +* Terraform 1.5.x or higher +* Helm 1. Clone this repository and navigate to `./infra-examples/aws`. You'll find Terraform modules for `vpc` and `k8s-cluster` resources. Proceed creating the `vpc` resources first, followed by the `k8s-cluster` resources. Make sure to have the target AWS account ID available, and then execute the following commands on every folder: - ``` + ```sh terraform init terraform plan terraform apply -auto-approve @@ -114,23 +117,23 @@ AWS account ID available, and then execute the following commands on every folde 2. Once the `k8s-cluster` is created, run the `terraform output` command on that module and copy the following output variables: - - cluster_name - - karpenter_irsa_role_arn - - karpenter_instance_profile_name + * cluster_name + * karpenter_irsa_role_arn + * karpenter_instance_profile_name These variables will be required in the next steps. 3. Karpenter is a dependency of the harmony chart that can be enabled or disabled. To include Karpenter in the Harmony Chart, **it is crucial** to configure these variables in your `values.yaml` file: - - `karpenter.enabled`: true - - `karpenter.serviceAccount.annotations.eks\.amazonaws\.com/role-arn`: "<`karpenter_irsa_role_arn` value from module>" - - `karpenter.settings.aws.defaultInstanceProfile`: "<`karpenter_instance_profile_name` value from module>" - - `karpenter.settings.aws.clusterName`: "<`cluster_name` value from module>" + * `karpenter.enabled`: true + * `karpenter.serviceAccount.annotations.eks\.amazonaws\.com/role-arn`: "<`karpenter_irsa_role_arn` value from module>" + * `karpenter.settings.aws.defaultInstanceProfile`: "<`karpenter_instance_profile_name` value from module>" + * `karpenter.settings.aws.clusterName`: "<`cluster_name` value from module>" Find below an example of the Karpenter section in the `values.yaml` file: - ``` + ```yaml karpenter: enabled: true serviceAccount: @@ -159,7 +162,6 @@ get further details. 5. To test Karpenter, you can proceed with the instructions included in the [official documentation](https://karpenter.sh/docs/getting-started/getting-started-with-karpenter/#first-use). -


## Usage Instructions @@ -178,24 +180,29 @@ memory** (that's enough to test 2 Open edX instances). with anyone else. For a full configuration reference, see the `charts/harmony-chart/values.yaml` file. 3. Install [Helm](https://helm.sh/) if you don't have it already. 4. Add the Harmony Helm repository: - ``` + + ```shell helm repo add openedx-harmony https://openedx.github.io/openedx-k8s-harmony helm repo update ``` + 5. Install the cert-manager CRDs if using cert-manager: - ``` + + ```shell kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.10.1/cert-manager.crds.yaml --namespace=harmony ``` + You can check the version of cert-manager that is going to be installed by the chart by checking the corresponding line in the `charts/harmony-chart/Chart.yaml` file. 6. Install the Harmony chart by running: - ``` + + ```shell helm install harmony --namespace harmony --create-namespace -f values.yaml openedx-harmony/harmony-chart ``` Note: in the future, if you apply changes to `values.yaml`, please run this command to update the deployment of the chart: -``` +```shell helm upgrade harmony --namespace harmony -f values.yaml openedx-harmony/harmony-chart ``` @@ -208,11 +215,13 @@ HTTPS and is more complicated due to the need to use tunnelling.* 1. First, [install `minikube`](https://minikube.sigs.k8s.io/docs/start/) if you don't have it already. 2. Run `minikube start` (you can also use `minikube dashboard` to access the Kubernetes dashboard). 3. Add the Helm repository and install the Harmony chart using the `values-minikube.yaml` file as configuration: - ``` + + ```shell helm repo add openedx-harmony https://openedx.github.io/openedx-k8s-harmony helm repo update helm install harmony --namespace harmony --create-namespace -f values-minikube.yaml openedx-harmony/harmony-chart ``` + 4. Run `minikube tunnel` (you may need to enter a password), and then you should be able to access the cluster (see "External IP" below). If this approach is not working, an alternative is to run\ `minikube service harmony-ingress-nginx-controller -n harmony`\ @@ -221,14 +230,13 @@ HTTPS and is more complicated due to the need to use tunnelling.* 5. In this case, skip step 2 ("Get the external IP") and use `127.0.0.1` as the external IP. You will need to remember to include the port numbers shown above when accessing the instances. - ### Step 2: Get the external IP The [ingress NGINX Controller](https://kubernetes.github.io/ingress-nginx/) is used to automatically set up an HTTPS reverse proxy for each Open edX instance as it gets deployed onto the cluster. There is just one load balancer with a single external IP for all the instances on the cluster. To get its IP, use: -``` +```shell kubectl get svc -n harmony harmony-ingress-nginx-controller ``` @@ -243,13 +251,13 @@ two A records for `lms.example.com` and `*.lms.example.com`, pointing to the ext You also will need to have the tutor-contrib-harmony-plugin installed into Tutor: -``` +```shell pip install -e 'git+https://github.com/openedx/openedx-k8s-harmony.git#egg=tutor-contrib-harmony-plugin&subdirectory=tutor-contrib-harmony-plugin' ``` Next, create a Tutor config directory unique to this instance, and configure it: -``` +```shell export INSTANCE_ID=openedx-01 export TUTOR_ROOT=~/deployments/tutor-k8s/$INSTANCE_ID tutor plugins enable k8s_harmony @@ -258,20 +266,16 @@ tutor config save -i --set K8S_NAMESPACE=$INSTANCE_ID Then deploy it: -``` +```shell tutor k8s start tutor k8s init ``` Note that the `init` command may take quite a long time to complete. Use the commands that Tutor says ("To view the logs -from this job, run:") in a separate terminal in order to monitor the status. Also note that if you want to use the MFEs, -[you'll need a custom image](https://github.com/overhangio/tutor-mfe/#running-mfes-on-kubernetes) and it won't work out -of the box. +from this job, run:") in a separate terminal in order to monitor the status. **You can repeat step 3 many times to install multiple instances onto the cluster.** - -


## Configuration Reference @@ -281,6 +285,8 @@ of the box. Tutor creates an Elasticsearch pod as part of the Kubernetes deployment. Depending on the number of instances Memory and CPU use can be lowered by running a central ES cluster instead of an ES pod for every instance. +**Please note that this will only work for "Palm" version and later.** + To enable set `elasticsearch.enabled=true` in your `values.yaml` and deploy the chart. For each instance you would like to enable this on, set the configuration values in the respective `config.yml`: @@ -290,9 +296,9 @@ K8S_HARMONY_ENABLE_SHARED_HARMONY_SEARCH: true RUN_ELASTICSEARCH: false ``` -- And create the user on the cluster with `tutor k8s harmony create-elasticsearch-user`. -- Rebuild your Open edX image `tutor images build openedx`. -- Finally, redeploy your changes: `tutor k8s start && tutor k8s init`. +* And create the user on the cluster with `tutor k8s harmony create-elasticsearch-user`. +* Rebuild your Open edX image `tutor images build openedx`. +* Finally, redeploy your changes: `tutor k8s start && tutor k8s init`. #### Caveats @@ -308,18 +314,22 @@ Just run `helm uninstall --namespace harmony harmony` to uninstall this. If you use DigitalOcean, you can use Terraform to quickly spin up a cluster, try this out, then shut it down again. Here's how. First, put the following into `infra-examples/secrets.auto.tfvars` including a valid DigitalOcean access token: -``` + +```conf cluster_name = "harmony-test" do_token = "digital-ocean-token" ``` + Then run: -``` + +```sh cd infra-examples/digitalocean terraform init terraform apply cd .. export KUBECONFIG=`pwd`/infra-examples/kubeconfig ``` + Then follow steps 1-4 above. When you're done, run `terraform destroy` to clean up everything. diff --git a/charts/harmony-chart/values.yaml b/charts/harmony-chart/values.yaml index d8e1418..d52ba8b 100644 --- a/charts/harmony-chart/values.yaml +++ b/charts/harmony-chart/values.yaml @@ -82,7 +82,6 @@ vpa: admissionController: replicaCount: 1 - # Multi-tenant OpenSearch opensearch: enabled: false diff --git a/tutor-contrib-harmony-plugin/Makefile b/tutor-contrib-harmony-plugin/Makefile new file mode 100644 index 0000000..b53c880 --- /dev/null +++ b/tutor-contrib-harmony-plugin/Makefile @@ -0,0 +1,34 @@ +.DEFAULT_GOAL := help +.PHONY: docs +SRC_DIRS = ./tutor_k8s_harmony_plugin +BLACK_OPTS = --exclude templates ${SRC_DIRS} + +# Warning: These checks are not necessarily run on every PR. +test: test-lint test-types test-format # Run some static checks. + +test-format: ## Run code formatting tests + black --check --diff $(BLACK_OPTS) + +test-lint: ## Run code linting tests + pylint --errors-only --enable=unused-import,unused-argument --ignore=templates --ignore=docs/_ext ${SRC_DIRS} + +test-types: ## Run type checks. + mypy --exclude=templates --ignore-missing-imports --implicit-reexport --strict ${SRC_DIRS} + +format: ## Format code automatically + black $(BLACK_OPTS) + +isort: ## Sort imports. This target is not mandatory because the output may be incompatible with black formatting. Provided for convenience purposes. + isort --skip=templates ${SRC_DIRS} + +changelog-entry: ## Create a new changelog entry. + scriv create + +changelog: ## Collect changelog entries in the CHANGELOG.md file. + scriv collect + +ESCAPE =  +help: ## Print this help + @grep -E '^([a-zA-Z_-]+:.*?## .*|######* .+)$$' Makefile \ + | sed 's/######* \(.*\)/@ $(ESCAPE)[1;31m\1$(ESCAPE)[0m/g' | tr '@' '\n' \ + | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/tutor-contrib-harmony-plugin/setup.py b/tutor-contrib-harmony-plugin/setup.py index 289a860..415c984 100644 --- a/tutor-contrib-harmony-plugin/setup.py +++ b/tutor-contrib-harmony-plugin/setup.py @@ -1,6 +1,7 @@ import io import os -from setuptools import setup, find_packages + +from setuptools import find_packages, setup HERE = os.path.abspath(os.path.dirname(__file__)) @@ -39,10 +40,10 @@ def load_about(): packages=find_packages(exclude=["tests*"]), include_package_data=True, python_requires=">=3.7", - install_requires=["tutor"], + install_requires=["tutor>=16.0.0,<17.0.0"], entry_points={ "tutor.plugin.v1": [ - "k8s_harmony = tutor_k8s_harmony_plugin.plugin" + "k8s_harmony = tutor_k8s_harmony_plugin.plugin", ] }, classifiers=[ @@ -55,5 +56,7 @@ def load_about(): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], ) diff --git a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/__about__.py b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/__about__.py index a68927d..3dc1f76 100644 --- a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/__about__.py +++ b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/__about__.py @@ -1 +1 @@ -__version__ = "0.1.0" \ No newline at end of file +__version__ = "0.1.0" diff --git a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/commands.py b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/commands.py index 047c4bf..1ad23e4 100644 --- a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/commands.py +++ b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/commands.py @@ -1,12 +1,11 @@ -import os - import click from tutor import config as tutor_config -from tutor import env as tutor_env -from tutor.commands.k8s import K8sContext, kubectl_exec +from tutor.commands.k8s import K8sContext + from .harmony_search.elasticsearch import ElasticSearchAPI from .harmony_search.opensearch import OpenSearchAPI + @click.group(help="Commands and subcommands of the openedx-k8s-harmony.") @click.pass_context def harmony(context: click.Context) -> None: @@ -41,6 +40,7 @@ def create_elasticsearch_user(context: click.Context): }, ) + @click.command(help="Create or update Opensearch users") @click.pass_obj def create_opensearch_user(context: click.Context): @@ -56,19 +56,21 @@ def create_opensearch_user(context: click.Context): prefix = config["HARMONY_SEARCH_INDEX_PREFIX"] api.put( f"_plugins/_security/api/roles/{role_name}", - {"index_permissions": [{ - "index_patterns": [ - f"{prefix}*" - ], - "allowed_actions": [ - "read", - "write", - "create_index", - "manage", - "manage_ilm", - "all" + { + "index_permissions": [ + { + "index_patterns": [f"{prefix}*"], + "allowed_actions": [ + "read", + "write", + "create_index", + "manage", + "manage_ilm", + "all", + ], + } ] - }]}, + }, ) api.put( diff --git a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/harmony_search/base.py b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/harmony_search/base.py index 57c3bca..be8d95e 100644 --- a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/harmony_search/base.py +++ b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/harmony_search/base.py @@ -69,7 +69,7 @@ def post(self, endpoint: str, data: dict) -> typing.Union[dict, bytes]: '"Content-Type: application/json"', ] ) - + def put(self, endpoint: str, data: dict) -> typing.Union[dict, bytes]: """ Runs a PUT request on the HarmonySearch cluster with the specified diff --git a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/patches/openedx-common-settings b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/patches/openedx-common-settings index 5c6e4a4..aeee87a 100644 --- a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/patches/openedx-common-settings +++ b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/patches/openedx-common-settings @@ -1,8 +1,8 @@ {% if K8S_HARMONY_ENABLE_SHARED_HARMONY_SEARCH %} -ELASTIC_SEARCH_INDEX_PREFIX = "{{HARMONY_SEARCH_INDEX_PREFIX}}" +ELASTIC_SEARCH_INDEX_PREFIX = "{{ HARMONY_SEARCH_INDEX_PREFIX }}" ELASTIC_SEARCH_CONFIG = [{ "use_ssl": True, - "host": "harmony-search-cluster.{{K8S_HARMONY_NAMESPACE}}.svc.cluster.local", + "host": "{{ K8S_HARMONY_ELASTIC_HOST }}", "verify_certs": False, "port": 9200, "http_auth": "{{ HARMONY_SEARCH_HTTP_AUTH }}" diff --git a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/plugin.py b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/plugin.py index 9de94ca..4d890d6 100644 --- a/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/plugin.py +++ b/tutor-contrib-harmony-plugin/tutor_k8s_harmony_plugin/plugin.py @@ -1,15 +1,16 @@ -from glob import glob import os -import pkg_resources +from glob import glob +import pkg_resources from tutor import hooks -from . import commands +from . import commands from .__about__ import __version__ config = { "defaults": { "VERSION": __version__, + "ELASTIC_HOST": "harmony-search-cluster.{{ K8S_HARMONY_NAMESPACE }}.svc.cluster.local", # This plugin assumes you are using ingress-nginx as an ingress controller to provide # you with a central load balancer. The standard Ingress object uses annotations to # trigger the generation of certificates using cert-manager. @@ -31,8 +32,8 @@ "ENABLE_HTTPS": True, }, "unique": { - "HARMONY_SEARCH_HTTP_AUTH": "{{K8S_NAMESPACE}}:{{ 24|random_string }}", - "HARMONY_SEARCH_INDEX_PREFIX": "{{K8S_NAMESPACE}}-{{ 4|random_string|lower }}-", + "HARMONY_SEARCH_HTTP_AUTH": "{{ K8S_NAMESPACE }}:{{ 24|random_string }}", + "HARMONY_SEARCH_INDEX_PREFIX": "{{ K8S_NAMESPACE }}-{{ 4|random_string|lower }}-", }, }