Skip to content
This repository has been archived by the owner on Oct 3, 2020. It is now read-only.

Commit

Permalink
Unused PVCs: check references by CronJobs/Jobs (#66)
Browse files Browse the repository at this point in the history
* use JMESpath for finding pod volume references

* #65 add test for CronJob

* check PVC references by CronJob/Job

* refactor into function
  • Loading branch information
hjacobs authored Mar 24, 2020
1 parent 820b6f1 commit 7f7f7b4
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 20 deletions.
68 changes: 49 additions & 19 deletions kube_janitor/resource_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,25 @@
from typing import Dict
from typing import Optional

import jmespath
from pykube import HTTPClient
from pykube.objects import APIObject
from pykube.objects import CronJob
from pykube.objects import Job
from pykube.objects import NamespacedAPIObject
from pykube.objects import Pod
from pykube.objects import StatefulSet

logger = logging.getLogger(__name__)

PVC_REFERENCES = {
Pod: jmespath.compile("spec.volumes[].persistentVolumeClaim"),
Job: jmespath.compile("spec.template.spec.volumes[].persistentVolumeClaim"),
CronJob: jmespath.compile(
"spec.jobTemplate.spec.template.spec.volumes[].persistentVolumeClaim"
),
}


def get_objects_in_namespace(
clazz, api: HTTPClient, namespace: str, cache: Dict[str, Any]
Expand All @@ -27,37 +38,56 @@ def get_objects_in_namespace(
return objects


def is_pvc_referenced_by_object(
pvc: NamespacedAPIObject, obj: NamespacedAPIObject, claim_path
) -> bool:
"""Check whether the given PVC is referenced by `obj` using the passed JMESpath to find volume claims."""
for claim in claim_path.search(obj.obj) or []:
if claim.get("claimName") == pvc.name:
return True
return False


def get_persistent_volume_claim_context(
pvc: NamespacedAPIObject, cache: Dict[str, Any]
):
) -> Dict[str, Any]:
"""Get context for PersistentVolumeClaim: whether it's mounted by a Pod and whether it's referenced by a StatefulSet."""
pvc_is_mounted = False
pvc_is_referenced = False

# find out whether a Pod mounts the PVC
for pod in get_objects_in_namespace(Pod, pvc.api, pvc.namespace, cache):
for volume in pod.obj.get("spec", {}).get("volumes", []):
if "persistentVolumeClaim" in volume:
if volume["persistentVolumeClaim"].get("claimName") == pvc.name:
logger.debug(
f"{pvc.kind} {pvc.namespace}/{pvc.name} is mounted by {pod.kind} {pod.name}"
)
for clazz, claim_path in PVC_REFERENCES.items():
if pvc_is_referenced:
break
# find out whether the PVC is still mounted by a Pod or referenced by some object
for obj in get_objects_in_namespace(clazz, pvc.api, pvc.namespace, cache):
if is_pvc_referenced_by_object(pvc, obj, claim_path):
if clazz is Pod:
verb = "mounted"
pvc_is_mounted = True
break

# find out whether the PVC is still referenced somewhere
for sts in get_objects_in_namespace(StatefulSet, pvc.api, pvc.namespace, cache):
# see https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/
for claim_template in sts.obj.get("spec", {}).get("volumeClaimTemplates", []):
claim_prefix = claim_template.get("metadata", {}).get("name")
claim_name_pattern = re.compile(f"^{claim_prefix}-{sts.name}-[0-9]+$")
if claim_name_pattern.match(pvc.name):
else:
verb = "referenced"
logger.debug(
f"{pvc.kind} {pvc.namespace}/{pvc.name} is referenced by {sts.kind} {sts.name}"
f"{pvc.kind} {pvc.namespace}/{pvc.name} is {verb} by {obj.kind} {obj.name}"
)
pvc_is_referenced = True
break

if not pvc_is_referenced:
# find out whether the PVC is still referenced by a StatefulSet
for sts in get_objects_in_namespace(StatefulSet, pvc.api, pvc.namespace, cache):
# see https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/
for claim_template in sts.obj.get("spec", {}).get(
"volumeClaimTemplates", []
):
claim_prefix = claim_template.get("metadata", {}).get("name")
claim_name_pattern = re.compile(f"^{claim_prefix}-{sts.name}-[0-9]+$")
if claim_name_pattern.match(pvc.name):
logger.debug(
f"{pvc.kind} {pvc.namespace}/{pvc.name} is referenced by {sts.kind} {sts.name}"
)
pvc_is_referenced = True
break

# negate the property to make it less error-prone for JMESpath usage
return {
"pvc_is_not_mounted": not pvc_is_mounted,
Expand Down
92 changes: 91 additions & 1 deletion tests/test_resource_context.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,61 @@
from unittest.mock import MagicMock

import yaml
from pykube.objects import Namespace
from pykube.objects import PersistentVolumeClaim

import kube_janitor.example_hooks
from kube_janitor.resource_context import get_resource_context

JOB_WITH_VOLUME = """
apiVersion: batch/v1
kind: Job
metadata:
name: pi
spec:
template:
spec:
containers:
- name: pi
image: my-image
volumeMounts:
- mountPath: "/data"
name: "job-data"
volumes:
- name: "foobar-data"
persistentVolumeClaim:
claimName: "job-data"
"""

CRONJOB_WITH_VOLUME = """
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: "foobar"
spec:
schedule: "0 23 * * *"
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
metadata:
labels:
application: "foobar"
spec:
restartPolicy: Never
containers:
- name: cont
image: "my-docker-image"
volumeMounts:
- mountPath: "/data"
name: "foobar-data"
volumes:
- name: "foobar-data"
persistentVolumeClaim:
claimName: "foobar-data"
"""


def test_pvc_not_mounted():
api_mock = MagicMock(name="APIMock")
Expand Down Expand Up @@ -58,7 +108,7 @@ def get(**kwargs):
assert not context["pvc_is_not_mounted"]


def test_pvc_is_referenced():
def test_pvc_is_referenced_by_statefulset():
api_mock = MagicMock(name="APIMock")

def get(**kwargs):
Expand Down Expand Up @@ -87,6 +137,46 @@ def get(**kwargs):
assert not context["pvc_is_not_referenced"]


def test_pvc_is_referenced_by_cronjob():
api_mock = MagicMock(name="APIMock")

def get(**kwargs):
if kwargs.get("url") == "cronjobs":
data = {"items": [yaml.safe_load(CRONJOB_WITH_VOLUME)]}
else:
data = {}
response = MagicMock()
response.json.return_value = data
return response

api_mock.get = get

pvc = PersistentVolumeClaim(api_mock, {"metadata": {"name": "foobar-data"}})

context = get_resource_context(pvc)
assert not context["pvc_is_not_referenced"]


def test_pvc_is_referenced_by_job():
api_mock = MagicMock(name="APIMock")

def get(**kwargs):
if kwargs.get("url") == "jobs":
data = {"items": [yaml.safe_load(JOB_WITH_VOLUME)]}
else:
data = {}
response = MagicMock()
response.json.return_value = data
return response

api_mock.get = get

pvc = PersistentVolumeClaim(api_mock, {"metadata": {"name": "job-data"}})

context = get_resource_context(pvc)
assert not context["pvc_is_not_referenced"]


def test_example_hook():
namespace = Namespace(None, {"metadata": {"name": "my-ns"}})
hook = kube_janitor.example_hooks.random_dice
Expand Down

0 comments on commit 7f7f7b4

Please sign in to comment.