Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fence_heuristics_resource: add new fence-agent for dynamic delay fencing #308

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions agents/heuristics_resource/fence_heuristics_resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/libexec/platform-python -tt
oalbrigt marked this conversation as resolved.
Show resolved Hide resolved

import io
import re
import subprocess
import shlex
import sys, stat
import logging
import os
import atexit
import time
sys.path.append("/usr/share/fence")
from fencing import fail_usage, run_command, fence_action, all_opt
from fencing import atexit_handler, check_input, process_input, show_docs
from fencing import run_delay

def heuristics_resource(con, options):

if options["--action"] == "on":
return True

if not "--resource" in options or options["--resource"] == "":
logging.error("resource parameter required")
return False

crm_resource_path = options["--crm-resource-path"]
resource = options["--resource"]
standby_wait = int(options["--standby-wait"])
p = None
cmd = "%s -r %s -W" % (crm_resource_path, resource)
search_str = re.compile(r"\s%s$" % os.uname()[1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as pacemaker doesn't necessarily use uname as name for the node it would be preferable here to use crm_node --name or something rather.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed to use crm_node --name


logging.info("Running command: %s", cmd)
try:
p = subprocess.Popen(shlex.split(cmd),
stdout=subprocess.PIPE);
except OSError:
logging.error("Command failed on OS level");
return False

if p != None:
p.wait()
if p.returncode == 0:
for line in p.stdout:
searchres = search_str.search(line.decode().strip())
if searchres:
# This node is ACT! Continue fencing.
return True
logging.info("Resource %s NOT found on this node" % resource);
else:
logging.error("Command failed. rc=%s" % p.returncode);

if standby_wait > 0:
# The SBY node waits for fencing from the ACT node, and
# tries to fencing to the ACT node when waking up from sleep.
logging.info("Standby wait %s sec" % standby_wait);
time.sleep(standby_wait)
return True

return False


def define_new_opts():
all_opt["resource"] = {
"getopt" : ":",
"longopt" : "resource",
"required" : "1",
"help" : "--resource=[resource-id] ID of the resource that should be running in the ACT node",
"shortdesc" : "Resource ID",
"default" : "",
"order" : 1
}
all_opt["standby_wait"] = {
"getopt" : ":",
"longopt" : "standby-wait",
"required" : "0",
"help" : "--standby-wait=[seconds] Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.",
"shortdesc" : "Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The agent will delay but not succeed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

"default" : "0",
"order" : 1
}
all_opt["crm_resource_path"] = {
"getopt" : ":",
"longopt" : "crm-resource-path",
"required" : "0",
"help" : "--crm-resource-path=[path] Path to crm_resource",
"shortdesc" : "Path to crm_resource",
"default" : "@CRM_RESOURCE_PATH@",
"order" : 1
}


def main():
device_opt = ["no_status", "no_password", "resource", "standby_wait", "crm_resource_path", "method"]
define_new_opts()
atexit.register(atexit_handler)

all_opt["method"]["default"] = "cycle"
all_opt["method"]["help"] = "-m, --method=[method] Method to fence (cycle|onoff) (Default: cycle)"

options = check_input(device_opt, process_input(device_opt))

docs = {}
docs["shortdesc"] = "Fence agent for resource-heuristic based fencing"
docs["longdesc"] = "fence_heuristics_resource uses resource-heuristics to control execution of another fence agent on the same fencing level.\
\n.P\n\
This is not a fence agent by itself! \
Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristic_resource."
docs["vendorurl"] = ""
show_docs(options, docs)

run_delay(options)

result = fence_action(\
None, \
options, \
None, \
None, \
reboot_cycle_fn = heuristics_resource,
sync_set_power_fn = heuristics_resource)

sys.exit(result)

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ AC_PATH_PROG([SNMPSET_PATH], [snmpset], [/usr/bin/snmpset])
AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget])
AC_PATH_PROG([NOVA_PATH], [nova], [/usr/bin/nova])
AC_PATH_PROG([POWERMAN_PATH], [powerman], [/usr/bin/powerman])
AC_PATH_PROG([CRM_RESOURCE_PATH], [crm_resource], [/usr/sbin/crm_resource])

AC_PATH_PROG([PING_CMD], [ping])
AC_PATH_PROG([PING6_CMD], [ping6])
Expand Down
14 changes: 14 additions & 0 deletions fence-agents.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ fence-agents-emerson \\
fence-agents-eps \\
fence-agents-hds-cb \\
fence-agents-heuristics-ping \\
fence-agents-heuristics-resource \\
fence-agents-hpblade \\
fence-agents-ibmblade \\
fence-agents-ifmib \\
Expand Down Expand Up @@ -536,6 +537,19 @@ ping-heuristics.
%{_sbindir}/fence_heuristics_ping
%{_mandir}/man8/fence_heuristics_ping.8*

%package heuristics-resource
License: GPLv2+ and LGPLv2+
Summary: Pseudo fence agent to affect other agents based on resource-heuristics
Requires: fence-agents-common = %{version}-%{release}
BuildArch: noarch
Obsoletes: fence-agents
%description heuristics-resource
Fence pseudo agent used to affect other agents based on
resource-heuristics.
%files heuristics-resource
%{_sbindir}/fence_heuristics_resource
%{_mandir}/man8/fence_heuristics_resource.8*

%package hpblade
License: GPLv2+ and LGPLv2+
Summary: Fence agent for HP BladeSystem devices
Expand Down
1 change: 1 addition & 0 deletions make/fencebuild.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ define gen_agent_from_py
-e 's#@''SNMPGET_PATH@#${SNMPGET_PATH}#g' \
-e 's#@''NOVA_PATH@#${NOVA_PATH}#g' \
-e 's#@''POWERMAN_PATH@#${POWERMAN_PATH}#g' \
-e 's#@''CRM_RESOURCE_PATH@#${CRM_RESOURCE_PATH}#g' \
-e 's#@''PING_CMD@#${PING_CMD}#g' \
-e 's#@''PING6_CMD@#${PING6_CMD}#g' \
-e 's#@''PING4_CMD@#${PING4_CMD}#g' \
Expand Down
105 changes: 105 additions & 0 deletions tests/data/metadata/fence_heuristics_resource.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
<?xml version="1.0" ?>
<resource-agent name="fence_heuristics_resource" shortdesc="Fence agent for resource-heuristic based fencing" >
<longdesc>fence_heuristics_resource uses resource-heuristics to control execution of another fence agent on the same fencing level.

This is not a fence agent by itself! Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristic_resource.</longdesc>
<vendor-url></vendor-url>
<parameters>
<parameter name="action" unique="0" required="1">
<getopt mixed="-o, --action=[action]" />
<content type="string" default="reboot" />
<shortdesc lang="en">Fencing action</shortdesc>
</parameter>
<parameter name="crm_resource_path" unique="0" required="0">
<getopt mixed="--crm-resource-path=[path]" />
<shortdesc lang="en">Path to crm_resource</shortdesc>
</parameter>
<parameter name="method" unique="0" required="0">
<getopt mixed="-m, --method=[method]" />
<content type="select" default="cycle" >
<option value="onoff" />
<option value="cycle" />
</content>
<shortdesc lang="en">Method to fence</shortdesc>
</parameter>
<parameter name="resource" unique="0" required="1">
<getopt mixed="--resource=[resource-id]" />
<content type="string" default="" />
<shortdesc lang="en">Resource ID</shortdesc>
</parameter>
<parameter name="standby_wait" unique="0" required="0">
<getopt mixed="--standby-wait=[seconds]" />
<content type="string" default="0" />
<shortdesc lang="en">Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.</shortdesc>
</parameter>
<parameter name="quiet" unique="0" required="0">
<getopt mixed="-q, --quiet" />
<content type="boolean" />
<shortdesc lang="en">Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog.</shortdesc>
</parameter>
<parameter name="verbose" unique="0" required="0">
<getopt mixed="-v, --verbose" />
<content type="boolean" />
<shortdesc lang="en">Verbose mode</shortdesc>
</parameter>
<parameter name="debug" unique="0" required="0" deprecated="1">
<getopt mixed="-D, --debug-file=[debugfile]" />
<content type="string" />
<shortdesc lang="en">Write debug information to given file</shortdesc>
</parameter>
<parameter name="debug_file" unique="0" required="0" obsoletes="debug">
<getopt mixed="-D, --debug-file=[debugfile]" />
<content type="string" />
<shortdesc lang="en">Write debug information to given file</shortdesc>
</parameter>
<parameter name="version" unique="0" required="0">
<getopt mixed="-V, --version" />
<content type="boolean" />
<shortdesc lang="en">Display version information and exit</shortdesc>
</parameter>
<parameter name="help" unique="0" required="0">
<getopt mixed="-h, --help" />
<content type="boolean" />
<shortdesc lang="en">Display help and exit</shortdesc>
</parameter>
<parameter name="delay" unique="0" required="0">
<getopt mixed="--delay=[seconds]" />
<content type="second" default="0" />
<shortdesc lang="en">Wait X seconds before fencing is started</shortdesc>
</parameter>
<parameter name="login_timeout" unique="0" required="0">
<getopt mixed="--login-timeout=[seconds]" />
<content type="second" default="5" />
<shortdesc lang="en">Wait X seconds for cmd prompt after login</shortdesc>
</parameter>
<parameter name="power_timeout" unique="0" required="0">
<getopt mixed="--power-timeout=[seconds]" />
<content type="second" default="20" />
<shortdesc lang="en">Test X seconds for status change after ON/OFF</shortdesc>
</parameter>
<parameter name="power_wait" unique="0" required="0">
<getopt mixed="--power-wait=[seconds]" />
<content type="second" default="0" />
<shortdesc lang="en">Wait X seconds after issuing ON/OFF</shortdesc>
</parameter>
<parameter name="shell_timeout" unique="0" required="0">
<getopt mixed="--shell-timeout=[seconds]" />
<content type="second" default="3" />
<shortdesc lang="en">Wait X seconds for cmd prompt after issuing command</shortdesc>
</parameter>
<parameter name="retry_on" unique="0" required="0">
<getopt mixed="--retry-on=[attempts]" />
<content type="integer" default="1" />
<shortdesc lang="en">Count of attempts to retry power on</shortdesc>
</parameter>
</parameters>
<actions>
<action name="on" automatic="0"/>
<action name="off" />
<action name="reboot" />
<action name="monitor" />
<action name="metadata" />
<action name="manpage" />
<action name="validate-all" />
</actions>
</resource-agent>