diff --git a/.github/workflows/build_container_image.yaml b/.github/workflows/build_container_image.yaml index 53a4a285a73d..dbff3546e9ae 100644 --- a/.github/workflows/build_container_image.yaml +++ b/.github/workflows/build_container_image.yaml @@ -10,8 +10,67 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: + ghcrbuild: + name: Build container image for GHCR + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + # https://stackoverflow.com/questions/59810838/how-to-get-the-short-sha-for-the-github-workflow + - name: Set outputs + id: commit + run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + - name: Set branch name + id: branch + run: | + if [[ "$GITHUB_REF" == "refs/tags/"* ]]; then + echo "name=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT + elif [[ "$GITHUB_REF" == "refs/heads/dev" ]]; then + echo "name=dev" >> $GITHUB_OUTPUT + elif [[ "$GITHUB_REF" == "refs/heads/release_"* ]]; then + echo "name=${GITHUB_REF#refs/heads/release_}-auto" >> $GITHUB_OUTPUT + fi + shell: bash + - name: Extract metadata for container image + id: meta + uses: docker/metadata-action@v4 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=raw,value=${{steps.branch.outputs.name}} + - name: Build args + id: buildargs + run: | + echo "gitcommit=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT + echo "builddate=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + with: + platforms: linux/amd64 + + - name: Login to GHCR + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push container image to ghcr + uses: docker/build-push-action@v4 + with: + build-args: | + GIT_COMMIT=${{ steps.buildargs.outputs.gitcommit }} + BUILD_DATE=${{ steps.buildargs.outputs.builddate }} + IMAGE_TAG=${{ steps.branch.outputs.name }} + file: .k8s_ci.Dockerfile + push: true + context: . + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64 + build: - name: Build container image + name: Build container image for Galaxy repos runs-on: ubuntu-latest if: github.repository_owner == 'galaxyproject' steps: @@ -58,3 +117,4 @@ jobs: uses: actions-hub/docker@master with: args: push galaxy/galaxy-min:${{ steps.branch.outputs.name }} + diff --git a/lib/galaxy/dependencies/conditional-requirements.txt b/lib/galaxy/dependencies/conditional-requirements.txt index abebd8fd0a13..d59bd0916e69 100644 --- a/lib/galaxy/dependencies/conditional-requirements.txt +++ b/lib/galaxy/dependencies/conditional-requirements.txt @@ -36,7 +36,7 @@ custos-sdk chronos-python==1.2.1 # Kubernetes job runner -pykube-ng==21.3.0 +pykube-ng==23.6.0 # Synnefo / Pithos+ object store client kamaki diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 56fef20a0ecb..ce94e12e5673 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -20,6 +20,7 @@ ) from galaxy.jobs.runners.util.pykube_util import ( deduplicate_entries, + DEFAULT_INGRESS_API_VERSION, DEFAULT_JOB_API_VERSION, delete_ingress, delete_job, @@ -34,6 +35,7 @@ HTTPError, Ingress, ingress_object_dict, + is_pod_running, is_pod_unschedulable, Job, job_object_dict, @@ -109,6 +111,8 @@ def __init__(self, app, nworkers, **kwargs): k8s_interactivetools_use_ssl=dict(map=bool, default=False), k8s_interactivetools_ingress_annotations=dict(map=str), k8s_interactivetools_ingress_class=dict(map=str, default=None), + k8s_interactivetools_tls_secret=dict(map=str, default=None), + k8s_ingress_api_version=dict(map=str, default=DEFAULT_INGRESS_API_VERSION), ) if "runner_param_specs" not in kwargs: @@ -249,6 +253,7 @@ def __configure_port_routing(self, ajs): service = Service(self._pykube_api, k8s_service_obj) service.create() ingress = Ingress(self._pykube_api, k8s_ingress_obj) + ingress.version = self.runner_params["k8s_ingress_api_version"] ingress.create() def __get_overridable_params(self, job_wrapper, param_key): @@ -429,6 +434,54 @@ def __get_k8s_service_spec(self, ajs): } return k8s_spec_template + def __get_k8s_ingress_rules_spec(self, ajs, entry_points): + """This represents the template for the "rules" portion of the Ingress spec.""" + if "v1beta1" in self.runner_params["k8s_ingress_api_version"]: + rules_spec = [ + { + "host": ep["domain"], + "http": { + "paths": [ + { + "backend": { + "serviceName": self.__get_k8s_job_name( + self.__produce_k8s_job_prefix(), ajs.job_wrapper + ), + "servicePort": int(ep["tool_port"]), + }, + "path": ep.get("entry_path", "/"), + "pathType": "Prefix", + } + ] + }, + } + for ep in entry_points + ] + else: + rules_spec = [ + { + "host": ep["domain"], + "http": { + "paths": [ + { + "backend": { + "service": { + "name": self.__get_k8s_job_name( + self.__produce_k8s_job_prefix(), ajs.job_wrapper + ), + "port": {"number": int(ep["tool_port"])}, + } + }, + "path": ep.get("entry_path", "/"), + "pathType": "ImplementationSpecific", + } + ] + }, + } + for ep in entry_points + ] + return rules_spec + def __get_k8s_ingress_spec(self, ajs): """The k8s spec template is nothing but a Ingress spec, except that it is nested and does not have an apiversion nor kind.""" @@ -466,39 +519,22 @@ def __get_k8s_ingress_spec(self, ajs): }, "annotations": {"app.galaxyproject.org/tool_id": ajs.job_wrapper.tool.id}, }, - "spec": { - "rules": [ - { - "host": ep["domain"], - "http": { - "paths": [ - { - "backend": { - "service": { - "name": self.__get_k8s_job_name( - self.__produce_k8s_job_prefix(), ajs.job_wrapper - ), - "port": {"number": int(ep["tool_port"])}, - } - }, - "path": ep.get("entry_path", "/"), - "pathType": "Prefix", - } - ] - }, - } - for ep in entry_points - ], - }, + "spec": {"rules": self.__get_k8s_ingress_rules_spec(ajs, entry_points)}, } default_ingress_class = self.runner_params.get("k8s_interactivetools_ingress_class") if default_ingress_class: k8s_spec_template["spec"]["ingressClassName"] = default_ingress_class if self.runner_params.get("k8s_interactivetools_use_ssl"): domains = list({e["domain"] for e in entry_points}) - k8s_spec_template["spec"]["tls"] = [ - {"hosts": [domain], "secretName": re.sub("[^a-z0-9-]", "-", domain)} for domain in domains - ] + override_secret = self.runner_params.get("k8s_interactivetools_tls_secret") + if override_secret: + k8s_spec_template["spec"]["tls"] = [ + {"hosts": [domain], "secretName": override_secret} for domain in domains + ] + else: + k8s_spec_template["spec"]["tls"] = [ + {"hosts": [domain], "secretName": re.sub("[^a-z0-9-]", "-", domain)} for domain in domains + ] if self.runner_params.get("k8s_interactivetools_ingress_annotations"): new_ann = yaml.safe_load(self.runner_params.get("k8s_interactivetools_ingress_annotations")) k8s_spec_template["metadata"]["annotations"].update(new_ann) @@ -767,10 +803,14 @@ def check_watched_item(self, job_state): pass else: pass - else: + elif self.__check_job_pod_running(job_state): log.debug("Job set to running...") job_state.running = True job_state.job_wrapper.change_state(model.Job.states.RUNNING) + else: + log.debug( + f"Job id: {job_state.job_id} with k8s id: {job.name} scheduled and is waiting to start..." + ) return job_state elif job_persisted_state == model.Job.states.DELETED: # Job has been deleted via stop_job and job has not been deleted, @@ -921,6 +961,17 @@ def __job_failed_due_to_low_memory(self, job_state): return False + def __check_job_pod_running(self, job_state): + """ + checks the state of the pod to see if it is running. + """ + pods = find_pod_object_by_name(self._pykube_api, job_state.job_id, self.runner_params["k8s_namespace"]) + if not pods.response["items"]: + return False + + pod = Pod(self._pykube_api, pods.response["items"][0]) + return is_pod_running(self._pykube_api, pod, self.runner_params["k8s_namespace"]) + def __job_pending_due_to_unschedulable_pod(self, job_state): """ checks the state of the pod to see if it is unschedulable. diff --git a/lib/galaxy/jobs/runners/util/pykube_util.py b/lib/galaxy/jobs/runners/util/pykube_util.py index 6fc2c039f741..b6122dd8aa00 100644 --- a/lib/galaxy/jobs/runners/util/pykube_util.py +++ b/lib/galaxy/jobs/runners/util/pykube_util.py @@ -94,6 +94,13 @@ def find_pod_object_by_name(pykube_api, job_name, namespace=None): return Pod.objects(pykube_api).filter(selector=f"job-name={job_name}", namespace=namespace) +def is_pod_running(pykube_api, pod, namespace=None): + if pod.obj["status"].get("phase") == "Running": + return True + + return False + + def is_pod_unschedulable(pykube_api, pod, namespace=None): is_unschedulable = any(c.get("reason") == "Unschedulable" for c in pod.obj["status"].get("conditions", [])) if pod.obj["status"].get("phase") == "Pending" and is_unschedulable: @@ -311,6 +318,7 @@ def galaxy_instance_id(params): "find_pod_object_by_name", "galaxy_instance_id", "HTTPError", + "is_pod_running", "is_pod_unschedulable", "Job", "Service", diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 75599f69eed9..d423c65e1216 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -2917,8 +2917,7 @@ def __init__( @property def active(self): if self.configured and not self.deleted: - # FIXME: don't included queued? - return not self.job.finished + return self.job.running return False @property