server/subcommand.py

__author__ = """Copyright Andy Whitcroft, Martin J. Bligh - 2006, 2007"""

import sys, os, time, signal, cPickle, logging

from autotest.client.shared import error, utils


# entry points that use subcommand must set this to their logging manager
# to get log redirection for subcommands
logging_manager_object = None


def parallel(tasklist, timeout=None, return_results=False):
    """
    Run a set of predefined subcommands in parallel.

    @param tasklist: A list of subcommand instances to execute.
    @param timeout: Number of seconds after which the commands should timeout.
    @param return_results: If True instead of an AutoServError being raised
            on any error a list of the results|exceptions from the tasks is
            returned.  [default: False]
    """
    run_error = False
    for task in tasklist:
        task.fork_start()

    remaining_timeout = None
    if timeout:
        endtime = time.time() + timeout

    results = []
    for task in tasklist:
        if timeout:
            remaining_timeout = max(endtime - time.time(), 1)
        try:
            status = task.fork_waitfor(timeout=remaining_timeout)
        except error.AutoservSubcommandError:
            run_error = True
        else:
            if status != 0:
                run_error = True

        results.append(cPickle.load(task.result_pickle))
        task.result_pickle.close()

    if return_results:
        return results
    elif run_error:
        message = 'One or more subcommands failed:\n'
        for task, result in zip(tasklist, results):
            message += 'task: %s returned/raised: %r\n' % (task, result)
        raise error.AutoservError(message)


def parallel_simple(function, arglist, log=True, timeout=None,
                    return_results=False):
    """
    Each element in the arglist used to create a subcommand object,
    where that arg is used both as a subdir name, and a single argument
    to pass to "function".

    We create a subcommand object for each element in the list,
    then execute those subcommand objects in parallel.

    NOTE: As an optimization, if len(arglist) == 1 a subcommand is not used.

    @param function: A callable to run in parallel once per arg in arglist.
    @param arglist: A list of single arguments to be used one per subcommand;
            typically a list of machine names.
    @param log: If True, output will be written to output in a subdirectory
            named after each subcommand's arg.
    @param timeout: Number of seconds after which the commands should timeout.
    @param return_results: If True instead of an AutoServError being raised
            on any error a list of the results|exceptions from the function
            called on each arg is returned.  [default: False]

    @returns None or a list of results/exceptions.
    """
    if not arglist:
        logging.warn('parallel_simple was called with an empty arglist, '
                     'did you forget to pass in a list of machines?')
    # Bypass the multithreading if only one machine.
    if len(arglist) == 1:
        arg = arglist[0]
        if return_results:
            try:
                result = function(arg)
            except Exception, e:
                return [e]
            return [result]
        else:
            function(arg)
            return

    subcommands = []
    for arg in arglist:
        args = [arg]
        if log:
            subdir = str(arg)
        else:
            subdir = None
        subcommands.append(subcommand(function, args, subdir))
    return parallel(subcommands, timeout, return_results=return_results)


class subcommand(object):
    fork_hooks, join_hooks = [], []

    def __init__(self, func, args, subdir = None):
        # func(args) - the subcommand to run
        # subdir     - the subdirectory to log results in
        if subdir:
            self.subdir = os.path.abspath(subdir)
            if not os.path.exists(self.subdir):
                os.mkdir(self.subdir)
            self.debug = os.path.join(self.subdir, 'debug')
            if not os.path.exists(self.debug):
                os.mkdir(self.debug)
        else:
            self.subdir = None
            self.debug = None

        self.func = func
        self.args = args
        self.lambda_function = lambda: func(*args)
        self.pid = None
        self.returncode = None


    def __str__(self):
        return str('subcommand(func=%s,  args=%s, subdir=%s)' %
                   (self.func, self.args, self.subdir))


    @classmethod
    def register_fork_hook(cls, hook):
        """ Register a function to be called from the child process after
        forking. """
        cls.fork_hooks.append(hook)


    @classmethod
    def register_join_hook(cls, hook):
        """ Register a function to be called when from the child process
        just before the child process terminates (joins to the parent). """
        cls.join_hooks.append(hook)


    def redirect_output(self):
        if self.subdir and logging_manager_object:
            tag = os.path.basename(self.subdir)
            logging_manager_object.tee_redirect_debug_dir(self.debug, tag=tag)


    def fork_start(self):
        sys.stdout.flush()
        sys.stderr.flush()
        r, w = os.pipe()
        self.returncode = None
        self.pid = os.fork()

        if self.pid:                            # I am the parent
            os.close(w)
            self.result_pickle = os.fdopen(r, 'r')
            return
        else:
            os.close(r)

        # We are the child from this point on. Never return.
        signal.signal(signal.SIGTERM, signal.SIG_DFL) # clear handler
        if self.subdir:
            os.chdir(self.subdir)
        self.redirect_output()

        try:
            for hook in self.fork_hooks:
                hook(self)
            result = self.lambda_function()
            os.write(w, cPickle.dumps(result, cPickle.HIGHEST_PROTOCOL))
            exit_code = 0
        except Exception, e:
            logging.exception('function failed')
            exit_code = 1
            os.write(w, cPickle.dumps(e, cPickle.HIGHEST_PROTOCOL))

        os.close(w)

        try:
            for hook in self.join_hooks:
                hook(self)
        finally:
            sys.stdout.flush()
            sys.stderr.flush()
            os._exit(exit_code)


    def _handle_exitstatus(self, sts):
        """
        This is partially borrowed from subprocess.Popen.
        """
        if os.WIFSIGNALED(sts):
            self.returncode = -os.WTERMSIG(sts)
        elif os.WIFEXITED(sts):
            self.returncode = os.WEXITSTATUS(sts)
        else:
            # Should never happen
            raise RuntimeError("Unknown child exit status!")

        if self.returncode != 0:
            print "subcommand failed pid %d" % self.pid
            print "%s" % (self.func,)
            print "rc=%d" % self.returncode
            print
            if self.debug:
                stderr_file = os.path.join(self.debug, 'autoserv.stderr')
                if os.path.exists(stderr_file):
                    for line in open(stderr_file).readlines():
                        print line,
            print "\n--------------------------------------------\n"
            raise error.AutoservSubcommandError(self.func, self.returncode)


    def poll(self):
        """
        This is borrowed from subprocess.Popen.
        """
        if self.returncode is None:
            try:
                pid, sts = os.waitpid(self.pid, os.WNOHANG)
                if pid == self.pid:
                    self._handle_exitstatus(sts)
            except os.error:
                pass
        return self.returncode


    def wait(self):
        """
        This is borrowed from subprocess.Popen.
        """
        if self.returncode is None:
            pid, sts = os.waitpid(self.pid, 0)
            self._handle_exitstatus(sts)
        return self.returncode


    def fork_waitfor(self, timeout=None):
        if not timeout:
            return self.wait()
        else:
            end_time = time.time() + timeout
            while time.time() <= end_time:
                returncode = self.poll()
                if returncode is not None:
                    return returncode
                time.sleep(1)

            utils.nuke_pid(self.pid)
            print "subcommand failed pid %d" % self.pid
            print "%s" % (self.func,)
            print "timeout after %ds" % timeout
            print
            return None