diff --git a/agents/heuristics_resource/fence_heuristics_resource.py b/agents/heuristics_resource/fence_heuristics_resource.py new file mode 100755 index 000000000..13adfdd49 --- /dev/null +++ b/agents/heuristics_resource/fence_heuristics_resource.py @@ -0,0 +1,198 @@ +#!@PYTHON@ -tt + +import io +import re +import subprocess +import shlex +import sys, stat +import logging +import atexit +import time +import xml.etree.ElementTree as ET +import distutils.util as dist +sys.path.append("/usr/share/fence") +from fencing import fail_usage, run_command, fence_action, all_opt +from fencing import atexit_handler, check_input, process_input, show_docs +from fencing import run_delay + +def heuristics_resource(con, options): + # Search the node where the resource is running and determine + # the ACT node or not. For SBY node, a delay is generated. + # Note that this method always returns FALSE. + + if not "--nodename" in options or options["--nodename"] == "": + logging.error("nodename parameter required") + return False + + if not "--resource" in options or options["--resource"] == "": + logging.error("resource parameter required") + return False + + target = options["--nodename"] + resource_id = options["--resource"] + wait_time = int(options["--standby-wait"]) + crm_node_path = options["--crm-node-path"] + crm_mon_path = options["--crm-mon-path"] + + (rc, out, err) = run_command(options, "%s --name" % crm_node_path) + if not rc == 0 or out is None: + logging.error("Can not get my nodename. rc=%s, stderr=%s" % (rc, err)) + return False + + node = out.strip() + + if node == target: + logging.info("Skip standby wait due to self-fencing.") + return False + + (rc, out, err) = run_command(options, "%s --as-xml" % crm_mon_path) + if not rc == 0 or out is None: + logging.error("crm_mon command failed. rc=%s, stderr=%s" % (rc, err)) + return False + + tree = ET.fromstring(out) + nodes = tree.findall('./nodes//*[@type="member"]') + nodelist = [] + for member in nodes: + nodelist.append(member.get("name")) + + resources = tree.findall('./resources//*[@id="%s"]' % resource_id) + if len(resources) == 0: + logging.error("Resource '%s' not found." % resource_id) + elif len(resources) == 1: + resource = resources[0] + type = resource.tag + if type == "resource": + # primitive resource + standby_node = check_standby_node(resource, node, nodelist) + failed = check_failed_attrib(resource) + if standby_node and not failed: + return standby_wait(wait_time) + elif type == "group": + # resource group + standby_node = True + failed = False + for child in resource: + failed |= check_failed_attrib(child) + standby_node &= check_standby_node(child, node, nodelist) + if standby_node and not failed: + return standby_wait(wait_time) + elif type == "clone" and dist.strtobool(resource.get("multi_state")): + # promotable resource + master_nodes = 0 + standby_node = True + failed = False + for native in resource: + failed |= check_failed_attrib(native) + if native.get("role") in ["Master"]: + master_nodes += 1 + standby_node &= check_standby_node(native, node, nodelist) + if master_nodes == 1 and standby_node and not failed: + return standby_wait(wait_time) + else: + # clone or bundle resource + logging.error("Unsupported resource type: '%s'" % type) + else: + logging.error("Multiple active resources found.") + + logging.info("Skip standby wait.") + return False + +def standby_wait(wait_time): + logging.info("Standby wait %s sec" % wait_time) + time.sleep(wait_time) + return False + +def check_failed_attrib(resource): + failed = dist.strtobool(resource.get("failed")) + ignored = dist.strtobool(resource.get("failure_ignored")) + return failed and not ignored + +def check_standby_node(resource, nodename, nodelist): + running_nodes = [] + for node in resource: + running_nodes.append(node.get("name")) + return len(set(running_nodes)) == 1 and running_nodes[0] in nodelist and not running_nodes[0] == nodename + +def define_new_opts(): + all_opt["nodename"] = { + "getopt" : "n:", + "longopt" : "nodename", + "required" : "1", + "help" : "-n, --nodename=[nodename] Name of node to be fenced", + "shortdesc" : "Name of node to be fenced", + "default" : "", + "order" : 1 + } + all_opt["resource"] = { + "getopt" : "r:", + "longopt" : "resource", + "required" : "1", + "help" : "-r, --resource=[resource-id] ID of the resource that should be running on the ACT node. It does not make sense to specify a cloned or bundled resource unless it is promotable and has only a single master instance.", + "shortdesc" : "Resource ID. It does not make sense to specify a cloned or bundled resource unless it is promotable and has only a single master instance.", + "default" : "", + "order" : 1 + } + all_opt["standby_wait"] = { + "getopt" : "w:", + "longopt" : "standby-wait", + "required" : "0", + "help" : "-w, --standby-wait=[seconds] Wait X seconds on SBY node. The agent will delay but not succeed.", + "shortdesc" : "Wait X seconds on SBY node. The agent will delay but not succeed.", + "default" : "5", + "order" : 1 + } + all_opt["crm_mon_path"] = { + "getopt" : ":", + "longopt" : "crm-mon-path", + "required" : "0", + "help" : "--crm-mon-path=[path] Path to crm_mon", + "shortdesc" : "Path to crm_mon command", + "default" : "@CRM_MON_PATH@", + "order" : 1 + } + all_opt["crm_node_path"] = { + "getopt" : ":", + "longopt" : "crm-node-path", + "required" : "0", + "help" : "--crm-node-path=[path] Path to crm_node", + "shortdesc" : "Path to crm_node command", + "default" : "@CRM_NODE_PATH@", + "order" : 1 + } + + +def main(): + device_opt = ["no_status", "no_password", "nodename", "resource", "standby_wait", "crm_mon_path", "crm_node_path", "method"] + define_new_opts() + atexit.register(atexit_handler) + + all_opt["method"]["default"] = "cycle" + all_opt["method"]["help"] = "-m, --method=[method] Method to fence (cycle|onoff) (Default: cycle)" + + options = check_input(device_opt, process_input(device_opt)) + + docs = {} + docs["shortdesc"] = "Fence agent for resource-heuristic based fencing delay" + docs["longdesc"] = "fence_heuristics_resource uses resource-heuristics to delay execution of fence agent running on next level.\ +\n.P\n\ +This is not a fence agent by itself! \ +Its only purpose is to delay execution of another fence agent that lives on next fencing level. \ +Note that this agent always returns FALSE. Therefore, subsequent agents on the same fencing level will not run" + docs["vendorurl"] = "" + show_docs(options, docs) + + run_delay(options) + + result = fence_action(\ + None, \ + options, \ + None, \ + None, \ + reboot_cycle_fn = heuristics_resource, + sync_set_power_fn = heuristics_resource) + + sys.exit(result) + +if __name__ == "__main__": + main() diff --git a/configure.ac b/configure.ac index 9b88d5f62..b40f9a93f 100644 --- a/configure.ac +++ b/configure.ac @@ -279,6 +279,8 @@ AC_PATH_PROG([SNMPSET_PATH], [snmpset], [/usr/bin/snmpset]) AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget]) AC_PATH_PROG([NOVA_PATH], [nova], [/usr/bin/nova]) AC_PATH_PROG([POWERMAN_PATH], [powerman], [/usr/bin/powerman]) +AC_PATH_PROG([CRM_MON_PATH], [crm_mon], [/usr/sbin/crm_mon]) +AC_PATH_PROG([CRM_NODE_PATH], [crm_node], [/usr/sbin/crm_node]) AC_PATH_PROG([PING_CMD], [ping]) AC_PATH_PROG([PING6_CMD], [ping6]) diff --git a/fence-agents.spec.in b/fence-agents.spec.in index 9be8a9440..aed2f97b1 100644 --- a/fence-agents.spec.in +++ b/fence-agents.spec.in @@ -50,6 +50,7 @@ fence-agents-emerson \\ fence-agents-eps \\ fence-agents-hds-cb \\ fence-agents-heuristics-ping \\ +fence-agents-heuristics-resource \\ fence-agents-hpblade \\ fence-agents-ibmblade \\ fence-agents-ifmib \\ @@ -536,6 +537,19 @@ ping-heuristics. %{_sbindir}/fence_heuristics_ping %{_mandir}/man8/fence_heuristics_ping.8* +%package heuristics-resource +License: GPLv2+ and LGPLv2+ +Summary: Pseudo fence agent to affect other agents based on resource-heuristics +Requires: fence-agents-common = %{version}-%{release} +BuildArch: noarch +Obsoletes: fence-agents +%description heuristics-resource +Fence pseudo agent used to affect other agents based on +resource-heuristics. +%files heuristics-resource +%{_sbindir}/fence_heuristics_resource +%{_mandir}/man8/fence_heuristics_resource.8* + %package hpblade License: GPLv2+ and LGPLv2+ Summary: Fence agent for HP BladeSystem devices diff --git a/make/fencebuild.mk b/make/fencebuild.mk index 819e03e6b..a552d74f1 100644 --- a/make/fencebuild.mk +++ b/make/fencebuild.mk @@ -28,6 +28,8 @@ define gen_agent_from_py -e 's#@''SNMPGET_PATH@#${SNMPGET_PATH}#g' \ -e 's#@''NOVA_PATH@#${NOVA_PATH}#g' \ -e 's#@''POWERMAN_PATH@#${POWERMAN_PATH}#g' \ + -e 's#@''CRM_MON_PATH@#${CRM_MON_PATH}#g' \ + -e 's#@''CRM_NODE_PATH@#${CRM_NODE_PATH}#g' \ -e 's#@''PING_CMD@#${PING_CMD}#g' \ -e 's#@''PING6_CMD@#${PING6_CMD}#g' \ -e 's#@''PING4_CMD@#${PING4_CMD}#g' \ diff --git a/tests/data/metadata/fence_heuristics_resource.xml b/tests/data/metadata/fence_heuristics_resource.xml new file mode 100644 index 000000000..4ac693cfb --- /dev/null +++ b/tests/data/metadata/fence_heuristics_resource.xml @@ -0,0 +1,114 @@ + + +fence_heuristics_resource uses resource-heuristics to delay execution of fence agent running on next level. + +This is not a fence agent by itself! Its only purpose is to delay execution of another fence agent that lives on next fencing level. Note that this agent always returns FALSE. Therefore, subsequent agents on the same fencing level will not run + + + + + + Fencing action + + + + Path to crm_mon command + + + + Path to crm_node command + + + + + + Method to fence + + + + + Name of node to be fenced + + + + + Resource ID. It does not make sense to specify a cloned or bundled resource unless it is promotable and has only a single master instance. + + + + + Wait X seconds on SBY node. The agent will delay but not succeed. + + + + + Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog. + + + + + Verbose mode + + + + + Write debug information to given file + + + + + Write debug information to given file + + + + + Display version information and exit + + + + + Display help and exit + + + + + Wait X seconds before fencing is started + + + + + Wait X seconds for cmd prompt after login + + + + + Test X seconds for status change after ON/OFF + + + + + Wait X seconds after issuing ON/OFF + + + + + Wait X seconds for cmd prompt after issuing command + + + + + Count of attempts to retry power on + + + + + + + + + + + +