Source code for scap.checks

# -*- coding: utf-8 -*-
"""
    scap.checks
    ~~~~~~~~~~~
    Deployment checks.

    Definitions are typically loaded from YAML of the following format:

        checks:
          some_unique_check_name:
            type: command
            command: /usr/local/bin/my_special_check
            stage: promote

          some_other_check_name:
            type: nrpe
            command: some_parsed_nrpe_command_name
            stage: promote

    Copyright © 2014-2017 Wikimedia Foundation and Contributors.

    This file is part of Scap.

    Scap is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, version 3.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
import collections
import os
import select
import shlex
import subprocess
import time

import scap.utils as utils


_TYPES = {}


[docs]class CheckInvalid(AssertionError): pass
[docs]def checktype(type_of_check): """ Class decorator for registering a new check type. :param type_of_check: type name """ def decorator(klass): register_type(type_of_check, klass) return klass return decorator
[docs]def execute(checks, logger, concurrency=1): """ Execute the given checks in parallel. :param checks: iterable of `checks.Check` objects, or a single `checks.Check` object. :param logger: `logging.Logger` to send messages to :param concurrency: level of concurrency :returns: tuple of the aggregate check success and list of executed checks :rtype: (bool, list) """ if isinstance(checks, Check): checks = [checks] epoll = select.epoll() todo = checks[::-1] doing = {} done = [] success = 0 def handle_failure(job, msg, kill=True): logger.warning(msg) handle_done(job) if kill: try: job.kill() except OSError: pass def handle_done(job): if job.fd in doing: del doing[job.fd] try: while todo or doing: # Schedule new jobs up to the concurrency level while todo and len(doing) < concurrency: check = todo.pop() logger.info("Executing check '{}'".format(check.name)) job = check.run() doing[job.fd] = job # Note: we do not call epoll.unregister() since the call to # Proc.communicate() in CheckJob.wait() closes the file # descriptor. epoll.register(job.fd, select.EPOLLIN) # Poll for stdout events for fd, event in epoll.poll(0.01): job = doing[fd] # Handle job completion if job.poll() is not None: job.wait() if job.isfailure(): msg = "Check '{}' failed: {}" msg = msg.format(job.check.name, job.output) handle_failure(job, msg, kill=False) else: msg = "Check '{}' completed, output: {}".format( job.check.name, job.output ) logger.debug(msg) handle_done(job) success += 1 done.append(job) # Enforce timeout on running jobs for job in list(doing.values()): if job.timedout(): msg = "Check '{}' exceeded {}s timeout" msg = msg.format(job.check.name, job.check.timeout) handle_failure(job, msg) finally: for job in doing.values(): msg = "Error running check '{}'".format(job.check.name) handle_failure(job, msg) epoll.close() return (len(done) == len(checks) == success, done)
[docs]def load(cfg, environment=None): """ Load checks from the given config dict. :param cfg: config dict :param environment: environment in which to execute checks """ checks = collections.OrderedDict() if cfg and cfg.get("checks", None): for name, options in cfg["checks"].items(): check_type = options.get("type", "command") if not options: check_type = "override" if check_type not in _TYPES: msg = "unknown check type '{}'".format(check_type) raise CheckInvalid(msg) checks[name] = _TYPES[check_type]( name=name, environment=environment, **options ) return checks
[docs]def register_type(check_type, factory): """ Register a new check type and factory. :param check_type: type name :param factory: callable type factory """ _TYPES[check_type] = factory
[docs]@checktype("command") class Check(object): """ Represent a loaded 'command' check. :param name: check name :param stage: (deprecated: use "after", ignored when "after" is set) stage after which to run the check :param before: stage before which to run the check :param after: stage after which to run the check :param environment: environment in which to run checks :param group: deploy group for which to run the check :param timeout: maximum time allowed for check execution, in seconds :param command: check command to run :param shell: If True, a shell is used to execute "command". """
[docs] def __init__( self, name, stage=None, before=None, after=None, environment=None, group=None, timeout=30.0, command="", shell=False, **opts ): self.name = name self.environment = environment self.before = before self.after = after or stage self.group = group self.timeout = timeout self.command = command self.shell = shell self.options = opts if self.environment is None: self.environment = os.environ.copy() # Avoid TypeError exceptions later on in Popen -> fsencode by removing # None values from the environment self.environment = {k: v for k, v in self.environment.items() if v is not None}
@property def stage(self): return self.after or self.before
[docs] def run(self): """Return a running :class:`CheckJob`.""" return CheckJob(self)
[docs] def validate(self): """Validate check properties.""" if not self.command: raise CheckInvalid("missing 'command'")
[docs] def __repr__(self): stages = "" if self.stage: stages += "stage: %s" % self.stage if self.before: stages += "before: %s" % self.before if self.after: stages += "after: %s" % self.after return "<Check %s %s>" % (self.name, stages)
[docs]class CheckJob(object): """ Represent and control a running check. A :class:`CheckJob` begins execution immediately and should be controlled within some kind of poll loop, typically `checks.execute`. """
[docs] def __init__(self, check): """Inititalizes a new CheckJob and begins execution.""" self.check = check cmd = check.command if check.shell else shlex.split(check.command) self.proc = subprocess.Popen( cmd, env=check.environment, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=check.shell, ) self.fd = self.proc.stdout.fileno() self.stream = self.proc.stdout self.output = "" self.started = time.time() self.ended = None
[docs] def duration(self): """Return the current or final job duration.""" ended = time.time() if self.ended is None else self.ended return ended - self.started
[docs] def isfailure(self): """Return whether this check failed.""" return self.proc.returncode != 0
[docs] def kill(self): """Kill the executing process.""" self.proc.kill() self.proc.communicate()
[docs] def poll(self): """ Read output and polls the process for exit status. If the process has exited, an (approximate) end time is recorded. This method is non-blocking typically called within an event loop like `checks.execute`. """ self.output += utils.eintr_retry(os.read, self.fd, 1048576).decode("utf-8") result = utils.eintr_retry(self.proc.poll) if result is not None: self.ended = time.time() return result
[docs] def timedout(self): """Whether the job duration has exceeded the job timeout.""" return self.duration() > self.check.timeout
[docs] def wait(self): """ Block for the last stdout/stderr read of the check process. To ensure the least amount of blocking, this method should only be called within an event loop once `poll` has signaled that the process has exited. """ # Note: communicate() closes the file descriptor after reading from it. # Closed file descriptors are automatically removed from the epoll set # by the kernel. for output in self.proc.communicate(): if output is not None: self.output += output.decode("utf-8")
[docs]@checktype("override") class OverrideCheck(object): """Represent a loaded 'override' check."""
[docs] def __init__(self, name, environment): """Initialize override check.""" self.name = name self.environment = environment
@property def stage(self): utils.get_logger().info("Check %s is empty and will not be run", self.name)