Source code for spicerack.k8s

"""Kubernetes module."""
import logging
import time
from http import HTTPStatus
from pathlib import Path
from typing import Any, Optional

import kubernetes  # mypy: no-type
from kubernetes import client, config  # mypy: no-type
from kubernetes.client.models import V1Taint

from spicerack.decorators import retry
from spicerack.exceptions import SpicerackCheckError, SpicerackError

logger = logging.getLogger(__name__)



[docs]
class KubernetesApiError(SpicerackError):
    """Custom error class for errors interacting with the kubernetes api."""




[docs]
class KubernetesApiTooManyRequests(KubernetesApiError):
    """Custom error class for HTTP TooManyRequest errors when interacting with the kubernetes api."""




[docs]
class KubernetesError(SpicerackError):
    """Custom error class for errors in running the kubernetes module."""




[docs]
class KubernetesCheckError(SpicerackCheckError):
    """Custom error class for errors checking kubernetes resources."""




[docs]
class Kubernetes:
    """High-level interface for interacting with the kubernetes api from spicerack."""

    def __init__(self, group: str, cluster: str, *, dry_run: bool = True):
        """Initialize the instance.

        Arguments:
            group: the cluster group we want to operate on.
            cluster: the cluster we're operating on.
            dry_run: if true, no write operations will happen.

        """
        self.group = group
        self.api = KubernetesApiFactory(cluster)
        self.dry_run = dry_run


[docs]
    def get_node(self, name: str) -> "KubernetesNode":
        """Get a kubernetes node.

        Arguments:
            name: the name of the node.

        Raises:
            spicerack.k8s.KubernetesApiError: if the node is not found on the cluster.

        """
        return KubernetesNode(name, self.api, self.dry_run)



[docs]
    def get_pod(self, namespace: str, name: str) -> "KubernetesPod":
        """Get a kubernetes pod.

        Arguments:
            name: the name of the pod.
            namespace: the namespace the pod is in.

        Raises:
            spicerack.k8s.KubernetesApiError: if the pod is not found on the cluster.

        """
        return KubernetesPod(namespace, name, self.api, self.dry_run)





[docs]
class KubernetesApiFactory:
    """Provides kubernetes object classes easy access to the API."""

    API_CLASSES: dict[str, Any] = {"core": client.CoreV1Api, "batch": client.BatchV1Api}
    """The different kubernetes APIs supported."""
    CONFIG_BASE: str = "/etc/kubernetes"
    """The base path for the kubernetes clusters configurations files."""

    def __init__(self, cluster: str):
        """Initialize the instance.

        Arguments:
            cluster: the cluster we're operating on.

        """
        self.cluster = cluster
        self._configurations: dict[str, client.Configuration] = {}


[docs]
    def configuration(self, user: str) -> client.Configuration:
        """Get the configuration for a specific user.

        Arguments:
            user: the user to fetch the configuration for.

        Raises:
            spicerack.k8s.KubernetesError: if the user or the configuration are invalid.

        """
        # Check the user doesn't contain path separators
        if len(Path(user).parts) != 1:
            raise KubernetesError(f"User '{user}' is not valid")
        if user not in self._configurations:
            self._configurations[user] = client.Configuration()
            try:
                config.load_kube_config(
                    config_file=str(self._config_file_path(user)), client_configuration=self._configurations[user]
                )
            except kubernetes.config.config_exception.ConfigException as e:
                raise KubernetesError(e) from e
        return self._configurations[user]



[docs]
    def core(self, *, user: str = "admin") -> kubernetes.client.CoreV1Api:
        """Return an instance of the core api correctly configured.

        Arguments:
            user: the user to use for authentication.

        """
        conf = self.configuration(user)
        return self.API_CLASSES["core"](client.ApiClient(configuration=conf))



[docs]
    def batch(self, *, user: str = "admin") -> kubernetes.client.BatchV1Api:
        """Return an instance of the batch api correctly configured.

        Arguments:
            user: the user to use for authentication.

        """
        conf = self.configuration(user)
        return self.API_CLASSES["batch"](client.ApiClient(configuration=conf))


    def _config_file_path(self, user: str) -> Path:
        """Returns the path on the configuration file for the given cluster and user."""
        return Path(self.CONFIG_BASE) / f"{user}-{self.cluster}.config"




[docs]
class KubernetesNode:
    """Encapsulates actions on a kubernetes node."""

    def __init__(
        self,
        fqdn: str,
        api: KubernetesApiFactory,
        dry_run: bool = True,
        init_obj: Optional[kubernetes.client.models.v1_node.V1Node] = None,
    ):
        """Initialize the instance.

        Arguments:
            fqdn: the fqdn of the node.
            api: the api factory we're going to use.
            dry_run: if true, no write operations will happen.
            init_obj: if not :py:data:`None`, this api object will be used, instead of fetching it from the api.

        """
        self._api = api
        self._fqdn = fqdn
        self._dry_run = dry_run
        if init_obj is not None:
            if init_obj.metadata.name != self._fqdn:
                raise KubernetesError(f"Mismatched names: got {init_obj.metadata.name}, expected {self._fqdn}")
            self._node = init_obj
        else:
            # Get the object corresponding to the fqdn provided. If non-existent, it will fail.
            self._node = self._get()


[docs]
    def is_schedulable(self) -> bool:
        """Checks if a node is schedulable or not.

        Returns:
            :py:data:`True` if payloads can be scheduled on the node, :py:data:`False` otherwise.

        """
        return not (self._node.spec and self._node.spec.unschedulable)


    @property
    def name(self) -> str:
        """The name of the node."""
        return self._node.metadata.name

    @property
    def taints(self) -> list[V1Taint]:
        """The  taints of the node."""
        return self._node.spec.taints if self._node.spec.taints is not None else []


[docs]
    def cordon(self) -> None:
        """Makes the node unschedulable.

        Raises:
            spicerack.k8s.KubernetesApiError: if the call to the api failed.
            spicerack.k8s.KubernetesCheckError: if the node wasn't set to unschedulable.

        """
        if not self.is_schedulable():
            logger.info("Node %s already cordoned", self.name)
            return
        if self._dry_run:
            logger.info("Would have cordoned %s", self.name)
            return
        logger.info("Cordoning %s", self.name)
        body = {"spec": {"unschedulable": True}}
        self._node = self._patch(body)
        # Now let's check if the object has changed in the api as expected.
        if self.is_schedulable():
            raise KubernetesCheckError(f"{self} is not unschedulable after trying to cordon it.")



[docs]
    def uncordon(self) -> None:
        """Makes a node schedulable.

        Raises:
            spicerack.k8s.KubernetesApiError: if the call to the api failed.
            spicerack.k8s.KubernetesCheckError: if the node wasn't set to unschedulable.

        """
        if self.is_schedulable():
            logger.info("Node %s already schedulable", self.name)
            return
        if self._dry_run:
            logger.info("Would have uncordoned %s", self.name)
            return
        logger.info("Uncordoning %s", self.name)
        body = {"spec": {"unschedulable": False}}
        self._node = self._patch(body)
        # Now let's check if the object has changed in the api as expected.
        if not self.is_schedulable():
            raise KubernetesCheckError(f"Node {self} is not schedulable after trying to uncordon it.")



[docs]
    def drain(self) -> None:
        """Drains the node, analogous to `kubectl drain`.

        Raises:
            spicerack.k8s.KubernetesCheckError: if we can't evict all pods.

        """
        unevictable: list["KubernetesPod"] = []
        failed: list[tuple["KubernetesPod", KubernetesApiError]] = []
        self.cordon()
        max_grace_period = 0
        for pod in self.get_pods():
            try:
                # Dry run is passed to the pods, so if we're in dry-run mode nothing will actually be evicted.
                pod.evict()
            except KubernetesError:
                # pod.evict raises a KubernetesError if the pod is unevictable
                unevictable.append(pod)
            except KubernetesApiError as e:
                failed.append((pod, e))
            # Update the max grace period.
            if not pod.is_terminated() and pod.spec.termination_grace_period_seconds > max_grace_period:
                max_grace_period = pod.spec.termination_grace_period_seconds

        if len(failed) > 0:
            for p, exc in failed:
                logger.error("Failed to evict pod %s from node %s: %s", p, self, exc)

            raise KubernetesCheckError(f"Could not evict all pods from node {self}")

        self._wait_for_empty(len(unevictable), max_grace_period)



[docs]
    def refresh(self) -> None:
        """Refresh the api object from the kubernetes api server."""
        self._node = self._get()


    def _get(self) -> kubernetes.client.models.v1_node.V1Node:
        """Get a node api object.

        Arguments:
            name: the name of the node.

        """
        try:
            nodes = self._api.core().list_node(field_selector=f"metadata.name={self._fqdn}")
            nodes_found = len(nodes.items)
            if nodes_found == 1:
                return nodes.items[0]
            if nodes_found == 0:  # pylint: disable=no-else-raise
                raise KubernetesError(f"Node {self._fqdn} not found")
            else:
                node_names = ",".join([o.metadata.name for o in nodes.items])
                raise KubernetesError(f"More than one node found for name {self._fqdn}: {node_names}")
        except kubernetes.client.exceptions.ApiException as exc:
            raise KubernetesApiError(f"Failed to list nodes: {exc}") from exc

    def _patch(self, body: dict[str, Any]) -> kubernetes.client.models.v1_node.V1Node:
        """Modify the node properties.

        Arguments:
            body: the modifications to the current node to send to the API.

        """
        try:
            return self._api.core().patch_node(self.name, body)
        except kubernetes.client.exceptions.ApiException as exc:
            raise KubernetesApiError(f"Failed to modify node: {exc}") from exc


[docs]
    def get_pods(self) -> list["KubernetesPod"]:
        """Get the pods running on this node."""
        pods = []
        try:
            for obj in self._api.core().list_pod_for_all_namespaces(field_selector=f"spec.nodeName={self.name}").items:
                p = KubernetesPod(
                    obj.metadata.namespace, obj.metadata.name, self._api, dry_run=self._dry_run, init_obj=obj
                )
                pods.append(p)
            return pods
        except kubernetes.client.exceptions.ApiException as exc:
            raise KubernetesApiError(f"Failed to find pods running on node {self.name}: {exc}") from exc


    def __str__(self) -> str:
        """String representation."""
        return f"Node({self._fqdn})"

    def _wait_for_empty(self, expected: int, max_grace_period: int) -> None:
        """Wait for all pods to be evicted.

        Arguments:
            expected: the number of expected pods.
            max_grace_period: how many seconds to sleep before starting to check.

        """
        if self._dry_run:
            logger.info("Would have waited for node %s to be empty", self.name)
            return

        def num_pods() -> int:
            return len([p for p in self.get_pods() if not p.is_terminated()])

        @retry(
            tries=5,
            backoff_mode="exponential",
            exceptions=(KubernetesCheckError,),
            failure_message="Waiting for pods to be evicted",
        )
        def wait() -> None:
            """Poll the number of pods to check if they match the expected ones."""
            npods = num_pods()
            if npods > expected:
                raise KubernetesCheckError(f"Node {self.name} still has {npods} pods, expected {expected}")

        # Wait for max grace period first, then retry 5 times with exponential backoff as pods need some time
        # to actually terminate and API needs some time to catch up. Especially for nodes with a large number
        # of pods.
        #
        # Please note that waiting for the max grace period is absolutely arbitrary and just what looks like
        # a reasonable time to wait for the api to conform its view of the node to reality.
        if num_pods() > expected:
            logger.debug("Waiting %d seconds before checking evictions again", max_grace_period)
            time.sleep(max_grace_period)
            wait()




[docs]
class KubernetesPod:
    """Encapsulates actions on a kubernetes pod."""

    def __init__(
        self,
        namespace: str,
        name: str,
        api: KubernetesApiFactory,
        dry_run: bool = True,
        init_obj: Optional[kubernetes.client.models.v1_pod.V1Pod] = None,
    ):
        """Initialize the pod isntance.

        Arguments:
            namespace: the namespace where the pod is located.
            name: the name of the pod.
            api: the api factory we're going to use.
            dry_run: if true, no write operations will happen.
            init_obj: if not None, this api object will be used, instead of fetching it from the api.

        """
        self._api = api
        self.name = name
        self.namespace = namespace
        self._dry_run = dry_run
        if init_obj is not None:
            if self.name != init_obj.metadata.name:
                raise KubernetesError(f"Mismatched names: got {init_obj.metadata.name}, expected {self.name}")
            if self.namespace != init_obj.metadata.namespace:
                raise KubernetesError(
                    f"Mismatched namespaces: {init_obj.metadata.namespace}, expected {self.namespace}"
                )
            self._pod = init_obj
        else:
            self._pod = self._get()

    @property
    def controller(self) -> Optional[kubernetes.client.models.v1_owner_reference.V1OwnerReference]:
        """Get the reference to the controlling object, if any."""
        ref = self._pod.metadata.owner_references
        if ref is None or len(ref) == 0:
            return None

        return ref[0]


[docs]
    def is_daemonset(self) -> bool:
        """Checks if the pod is part of a daemonset."""
        if self.controller is None:
            return False
        return self.controller.kind == "DaemonSet"



[docs]
    def is_terminated(self) -> bool:
        """Checks if the pod is terminated."""
        return self._pod.status.phase in ["Succeeded", "Failed"]



[docs]
    def is_mirror(self) -> bool:
        """Check if the pod is a mirror pod."""
        return "kubernetes.io/config.mirror" in self._pod.metadata.annotations


    @property
    def spec(self) -> kubernetes.client.models.v1_pod_spec.V1PodSpec:
        """Get the pod's spec."""
        return self._pod.spec


[docs]
    def is_evictable(self) -> bool:
        """Check if the pod can be evicted."""
        # We apply the logic found in kubectl:
        # https://github.com/kubernetes/kubernetes/blob/release-1.16/staging/src/k8s.io/kubectl/pkg/drain/filters.go
        # Check zero: a terminated pod is always evictable.
        if self.is_terminated():
            return True
        # Check one: the pod is orphaned and not finished
        if self.controller is None:
            logger.warning("Pod %s is orphaned, not evictable", self)
            return False
        # Check two: the pod is a daemonset
        if self.is_daemonset():
            logger.warning("Pod %s is a daemonset, not evictable", self)
            return False
        # Check three: the pod is a mirror
        if self.is_mirror():
            logger.warning("Pod %s is a mirror pod, not evictable", self)
            return False
        return True



[docs]
    def evict(self) -> None:
        """Submit an eviction request to the kubernetes api for this pod.

        Raises:
            spicerack.k8s.KubernetesApiTooManyRequests: in case of a persistent HTTP 429 from the server.
            spicerack.k8s.KubernetesApiError: in case of a bad response from the server.
            spicerack.k8s.KubernetesError: if the pod is not evictable.

        """
        if not self.is_evictable():
            raise KubernetesError(f"Pod {self} is not evictable.")

        if self._dry_run:
            logger.info("Would have evicted %s", self)
            return
        logger.debug("Evicting pod %s", self)
        body = kubernetes.client.V1beta1Eviction(metadata=client.V1ObjectMeta(name=self.name, namespace=self.namespace))

        @retry(
            tries=5,
            backoff_mode="exponential",
            exceptions=(KubernetesApiTooManyRequests,),
            failure_message=f"Retrying eviction of {self}. API error was",
        )
        def retry_evict() -> None:
            """Evict the pod."""
            try:
                self._api.core().create_namespaced_pod_eviction(self.name, self.namespace, body)
            except kubernetes.client.exceptions.ApiException as e:
                # The eviction is not currently allowed because of a PodDisruptionBudget or
                # we hit an API rate limit.
                # In both cases we should retry the eviction.
                if e.status == HTTPStatus.TOO_MANY_REQUESTS:
                    logger.info("Failed to evict pod %s - HTTP response body: %s", self, e.body)
                    raise KubernetesApiTooManyRequests(e) from e
                raise KubernetesApiError(e) from e

        retry_evict()


    def _get(self) -> kubernetes.client.models.v1_pod.V1Pod:
        """Get the object from the api."""
        try:
            # by convention, we have a read-only user with the same name as the namespace.
            return self._api.core(user=self.namespace).read_namespaced_pod(self.name, self.namespace)
        except kubernetes.client.exceptions.ApiException as e:
            raise KubernetesApiError(f"Error from the kubernetes api: {e}") from e


[docs]
    def refresh(self) -> None:
        """Refresh the api object from the kubernetes api server."""
        self._pod = self._get()


    def __str__(self) -> str:
        """String representation."""
        return f"Pod({self.namespace}/{self.name})"